From deee1529093a5dcce2c4143fa2d9348ec06239ff Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Mon, 17 Jan 2022 14:44:06 -0600 Subject: [PATCH] Add support for exporting portable handles to GPU allocations. Adds hsa_amd_portable_export_dmabuf and hsa_amd_portable_close_dmabuf which allow obtaining dmabuf handles to rocr allocations. These handles may be shared with other APIs to support cross vendor & cross device memory sharing. Adds query to return whether dmabuf export is supported Signed-off-by: Jonathan Kim Signed-off-by: David Yat Sin Change-Id: I7f98501087d9563d07fc2cb428cc886b1e518b1e [ROCm/ROCR-Runtime commit: 42243c1e8ffc72aef91f8d21a2701b32a3c03865] --- .../runtime/hsa-runtime/CMakeLists.txt | 2 +- .../core/common/hsa_table_interface.cpp | 11 +++ .../hsa-runtime/core/inc/hsa_ext_amd_impl.h | 7 ++ .../runtime/hsa-runtime/core/inc/runtime.h | 4 + .../core/runtime/hsa_api_trace.cpp | 2 + .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 22 ++++++ .../hsa-runtime/core/runtime/runtime.cpp | 59 ++++++++++++++ .../runtime/hsa-runtime/hsacore.so.def | 2 + .../runtime/hsa-runtime/inc/hsa.h | 7 +- .../runtime/hsa-runtime/inc/hsa_api_trace.h | 2 + .../runtime/hsa-runtime/inc/hsa_ext_amd.h | 76 ++++++++++++++++++- 11 files changed, 191 insertions(+), 3 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt index 88c18c011d..f0e14c3b6e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt +++ b/projects/rocr-runtime/runtime/hsa-runtime/CMakeLists.txt @@ -85,7 +85,7 @@ if (ROCM_CCACHE_BUILD) endif() # if (ROCM_CCACHE_BUILD) ## Get version strings -get_version ( "1.8.0" ) +get_version ( "1.9.0" ) if ( ${ROCM_PATCH_VERSION} ) set ( VERSION_PATCH ${ROCM_PATCH_VERSION}) endif() diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 2fec272d58..3802ae9b49 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1212,6 +1212,17 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, is_data_loss); } +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf, + uint64_t* offset) { + return amdExtTable->hsa_amd_portable_export_dmabuf_fn(ptr, size, dmabuf, offset); +} + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf) { + return amdExtTable->hsa_amd_portable_close_dmabuf_fn(dmabuf); +} + // Tools only table interfaces. namespace rocr { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 75b5972cdb..bbe5b91b5c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -291,6 +291,13 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* size_copied, void* dest, bool* is_data_loss); +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf, + uint64_t* offset); + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf); + } // namespace amd } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index dd38f615a3..11e8ce82b1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -338,6 +338,10 @@ class Runtime { hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal); + hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset); + + hsa_status_t DmaBufClose(int dmabuf); + const std::vector& cpu_agents() { return cpu_agents_; } const std::vector& gpu_agents() { return gpu_agents_; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index 5f81eec690..fcaebf5b05 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -400,6 +400,8 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_spm_acquire_fn = AMD::hsa_amd_spm_acquire; amd_ext_api.hsa_amd_spm_release_fn = AMD::hsa_amd_spm_release; amd_ext_api.hsa_amd_spm_set_dest_buffer_fn = AMD::hsa_amd_spm_set_dest_buffer; + amd_ext_api.hsa_amd_portable_export_dmabuf_fn = AMD::hsa_amd_portable_export_dmabuf; + amd_ext_api.hsa_amd_portable_close_dmabuf_fn = AMD::hsa_amd_portable_close_dmabuf; } void LoadInitialHsaApiTable() { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 43fd38939c..a4feac9894 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -100,6 +100,11 @@ struct ValidityError { if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \ } while (false) +#define IS_ZERO(arg) \ + do { \ + if ((arg) == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \ + } while (false) + #define IS_VALID(ptr) \ do { \ if ((ptr) == NULL || !(ptr)->IsValid()) \ @@ -1167,7 +1172,24 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz return HSA_STATUS_ERROR; return HSA_STATUS_SUCCESS; + CATCH; +} +hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf, + uint64_t* offset) { + TRY; + IS_OPEN(); + IS_BAD_PTR(ptr); + IS_BAD_PTR(dmabuf); + IS_BAD_PTR(offset); + IS_ZERO(size); + return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf, offset); + CATCH; +} + +hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf) { + TRY; + return core::Runtime::runtime_singleton_->DmaBufClose(dmabuf); CATCH; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 4fb212691e..b5ae773d40 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -685,6 +685,18 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) { *((bool*)value) = g_use_mwaitx; break; } + case HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED: { + auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version; + + // Implemented in KFD in 1.12 + if (kfd_version.KernelInterfaceMajorVersion > 1 || + kfd_version.KernelInterfaceMajorVersion == 1 && + kfd_version.KernelInterfaceMinorVersion >= 12) + *(reinterpret_cast(value)) = true; + else + *(reinterpret_cast(value)) = false; + break; + } default: return HSA_STATUS_ERROR_INVALID_ARGUMENT; } @@ -2300,5 +2312,52 @@ Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) { return agents_by_node_[prefetch_node][0]; } +hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset) { +#ifdef __linux__ + ScopedAcquire lock(memory_lock_.shared()); + // Lookup containing allocation. + auto mem = allocation_map_.upper_bound(ptr); + if (mem != allocation_map_.begin()) { + mem--; + if ((mem->first <= ptr) && + (ptr < reinterpret_cast(mem->first) + mem->second.size)) { + // Check size is in bounds. + if (uintptr_t(ptr) - uintptr_t(mem->first) + size <= mem->second.size) { + // Check allocation is on GPU + if (mem->second.region->owner()->device_type() != Agent::kAmdGpuDevice) + return HSA_STATUS_ERROR_INVALID_AGENT; + + int fd; + uint64_t off; + HSAKMT_STATUS err = hsaKmtExportDMABufHandle(const_cast(ptr), size, &fd, &off); + if (err == HSAKMT_STATUS_SUCCESS) { + *dmabuf = fd; + *offset = off; + return HSA_STATUS_SUCCESS; + } + + assert((err != HSAKMT_STATUS_INVALID_PARAMETER) && + "Thunk does not recognize an expected allocation."); + if (err == HSAKMT_STATUS_ERROR) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + return HSA_STATUS_ERROR; + } + } + } + return HSA_STATUS_ERROR_INVALID_ALLOCATION; +#else + return HSA_STATUS_ERROR_NOT_INITIALIZED; +#endif +} + +hsa_status_t Runtime::DmaBufClose(int dmabuf) { +#ifdef __linux__ + int err = close(dmabuf); + if (err == 0) return HSA_STATUS_SUCCESS; + return HSA_STATUS_ERROR_RESOURCE_FREE; +#else + return HSA_STATUS_ERROR_NOT_INITIALIZED; +#endif +} + } // namespace core } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def index 07f4e93749..bd74f30661 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def @@ -231,6 +231,8 @@ global: hsa_amd_spm_acquire; hsa_amd_spm_release; hsa_amd_spm_set_dest_buffer; + hsa_amd_portable_export_dmabuf; + hsa_amd_portable_close_dmabuf; local: *; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h index 8b668f1e9e..3c0db5d52b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h @@ -499,7 +499,12 @@ typedef enum { * Returns true if mwaitx is enabled on this system * The type of this attribute is bool. */ - HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203 + HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203, + /** + * Returns true if DMABUF APIs are supported by the driver. The type of + * this attribute is bool. + */ + HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED = 0x204 } hsa_system_info_t; /** diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h index 3954b51a31..3c613dc393 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -192,6 +192,8 @@ struct AmdExtTable { decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn; decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn; decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn; + decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn; + decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn; }; // Table to export HSA Core Runtime Apis diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index a094001c2b..669894a173 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -48,8 +48,12 @@ #include "hsa.h" #include "hsa_ext_image.h" +/* + * - 1.0 - initial version + * - 1.1 - dmabuf export + */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 0 +#define HSA_AMD_INTERFACE_VERSION_MINOR 1 #ifdef __cplusplus extern "C" { @@ -2581,6 +2585,76 @@ hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent); hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes, uint32_t* timeout, uint32_t* size_copied, void* dest, bool* is_data_loss); +/** + * @brief Obtains an OS specific, vendor neutral, handle to a memory allocation. + * + * Obtains an OS specific handle to GPU agent memory. The memory must be part + * of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent. + * The handle may be used with other APIs (e.g. Vulkan) to obtain shared access + * to the allocation. + * + * Shared access to the memory is not guaranteed to be fine grain coherent even + * if the allocation exported is from a fine grain pool. The shared memory + * consistency model will be no stronger than the model exported from, consult + * the importing API to determine the final consistency model. + * + * The allocation's memory remains valid as long as the handle and any mapping + * of the handle remains valid. When the handle and all mappings are closed + * the backing memory will be released for reuse. + * + * @param[in] ptr Pointer to the allocation being exported. + * + * @param[in] size Size in bytes to export following @p ptr. The entire range + * being exported must be contained within a single allocation. + * + * @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the + * allocation. Contents will not be altered in the event of failure. + * + * @param[out] offset Offset in bytes into the memory referenced by the dma-buf + * object at which @p ptr resides. Contents will not be altered in the event + * of failure. + * + * @retval ::HSA_STATUS_SUCCESS Export completed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by + * @p ptr and @p size are not contained within a single allocation. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr + * and @p size was allocated on a device which can not export memory. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor, + * @p dmabuf, could not be created. + */ +hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf, + uint64_t* offset); + +/** + * @brief Closes an OS specific, vendor neutral, handle to a memory allocation. + * + * Closes an OS specific handle to GPU agent memory. + * + * Applications should close a handle after imports are complete. The handle + * is not required to remain open for the lifetime of imported mappings. The + * referenced allocation will remain valid until all handles and mappings + * are closed. + * + * @param[in] dmabuf Handle to be closed. + * + * @retval ::HSA_STATUS_SUCCESS Handle closed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_RESOURCE_FREE A generic error was encountered + * when closing the handle. The handle may have been closed already or an + * async IO error may have occured. + */ +hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf); #ifdef __cplusplus } // end extern "C" block