Add support for exporting portable handles to GPU allocations.
Adds hsa_amd_portable_export_dmabuf and hsa_amd_portable_close_dmabuf
which allow obtaining dmabuf handles to rocr allocations. These handles
may be shared with other APIs to support cross vendor & cross device
memory sharing.
Adds query to return whether dmabuf export is supported
Signed-off-by: Jonathan Kim <Jonathan.Kim@amd.com>
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Change-Id: I7f98501087d9563d07fc2cb428cc886b1e518b1e
[ROCm/ROCR-Runtime commit: 42243c1e8f]
Bu işleme şunda yer alıyor:
işlemeyi yapan:
David Yat Sin
ebeveyn
57064af98d
işleme
deee152909
@@ -85,7 +85,7 @@ if (ROCM_CCACHE_BUILD)
|
||||
endif() # if (ROCM_CCACHE_BUILD)
|
||||
|
||||
## Get version strings
|
||||
get_version ( "1.8.0" )
|
||||
get_version ( "1.9.0" )
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
endif()
|
||||
|
||||
@@ -1212,6 +1212,17 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size,
|
||||
is_data_loss);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset) {
|
||||
return amdExtTable->hsa_amd_portable_export_dmabuf_fn(ptr, size, dmabuf, offset);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf) {
|
||||
return amdExtTable->hsa_amd_portable_close_dmabuf_fn(dmabuf);
|
||||
}
|
||||
|
||||
// Tools only table interfaces.
|
||||
namespace rocr {
|
||||
|
||||
|
||||
@@ -291,6 +291,13 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size,
|
||||
uint32_t* size_copied, void* dest,
|
||||
bool* is_data_loss);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf);
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
|
||||
@@ -338,6 +338,10 @@ class Runtime {
|
||||
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
|
||||
|
||||
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset);
|
||||
|
||||
hsa_status_t DmaBufClose(int dmabuf);
|
||||
|
||||
const std::vector<Agent*>& cpu_agents() { return cpu_agents_; }
|
||||
|
||||
const std::vector<Agent*>& gpu_agents() { return gpu_agents_; }
|
||||
|
||||
@@ -400,6 +400,8 @@ void HsaApiTable::UpdateAmdExts() {
|
||||
amd_ext_api.hsa_amd_spm_acquire_fn = AMD::hsa_amd_spm_acquire;
|
||||
amd_ext_api.hsa_amd_spm_release_fn = AMD::hsa_amd_spm_release;
|
||||
amd_ext_api.hsa_amd_spm_set_dest_buffer_fn = AMD::hsa_amd_spm_set_dest_buffer;
|
||||
amd_ext_api.hsa_amd_portable_export_dmabuf_fn = AMD::hsa_amd_portable_export_dmabuf;
|
||||
amd_ext_api.hsa_amd_portable_close_dmabuf_fn = AMD::hsa_amd_portable_close_dmabuf;
|
||||
}
|
||||
|
||||
void LoadInitialHsaApiTable() {
|
||||
|
||||
@@ -100,6 +100,11 @@ struct ValidityError<const T*> {
|
||||
if ((ptr) == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \
|
||||
} while (false)
|
||||
|
||||
#define IS_ZERO(arg) \
|
||||
do { \
|
||||
if ((arg) == 0) return HSA_STATUS_ERROR_INVALID_ARGUMENT; \
|
||||
} while (false)
|
||||
|
||||
#define IS_VALID(ptr) \
|
||||
do { \
|
||||
if ((ptr) == NULL || !(ptr)->IsValid()) \
|
||||
@@ -1167,7 +1172,24 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(ptr);
|
||||
IS_BAD_PTR(dmabuf);
|
||||
IS_BAD_PTR(offset);
|
||||
IS_ZERO(size);
|
||||
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf, offset);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf) {
|
||||
TRY;
|
||||
return core::Runtime::runtime_singleton_->DmaBufClose(dmabuf);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
|
||||
@@ -685,6 +685,18 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
|
||||
*((bool*)value) = g_use_mwaitx;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED: {
|
||||
auto kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version;
|
||||
|
||||
// Implemented in KFD in 1.12
|
||||
if (kfd_version.KernelInterfaceMajorVersion > 1 ||
|
||||
kfd_version.KernelInterfaceMajorVersion == 1 &&
|
||||
kfd_version.KernelInterfaceMinorVersion >= 12)
|
||||
*(reinterpret_cast<bool*>(value)) = true;
|
||||
else
|
||||
*(reinterpret_cast<bool*>(value)) = false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -2300,5 +2312,52 @@ Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
|
||||
return agents_by_node_[prefetch_node][0];
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset) {
|
||||
#ifdef __linux__
|
||||
ScopedAcquire<KernelSharedMutex::Shared> lock(memory_lock_.shared());
|
||||
// Lookup containing allocation.
|
||||
auto mem = allocation_map_.upper_bound(ptr);
|
||||
if (mem != allocation_map_.begin()) {
|
||||
mem--;
|
||||
if ((mem->first <= ptr) &&
|
||||
(ptr < reinterpret_cast<const uint8_t*>(mem->first) + mem->second.size)) {
|
||||
// Check size is in bounds.
|
||||
if (uintptr_t(ptr) - uintptr_t(mem->first) + size <= mem->second.size) {
|
||||
// Check allocation is on GPU
|
||||
if (mem->second.region->owner()->device_type() != Agent::kAmdGpuDevice)
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
|
||||
int fd;
|
||||
uint64_t off;
|
||||
HSAKMT_STATUS err = hsaKmtExportDMABufHandle(const_cast<void*>(ptr), size, &fd, &off);
|
||||
if (err == HSAKMT_STATUS_SUCCESS) {
|
||||
*dmabuf = fd;
|
||||
*offset = off;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
assert((err != HSAKMT_STATUS_INVALID_PARAMETER) &&
|
||||
"Thunk does not recognize an expected allocation.");
|
||||
if (err == HSAKMT_STATUS_ERROR) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
#else
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
#endif
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::DmaBufClose(int dmabuf) {
|
||||
#ifdef __linux__
|
||||
int err = close(dmabuf);
|
||||
if (err == 0) return HSA_STATUS_SUCCESS;
|
||||
return HSA_STATUS_ERROR_RESOURCE_FREE;
|
||||
#else
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace core
|
||||
} // namespace rocr
|
||||
|
||||
@@ -231,6 +231,8 @@ global:
|
||||
hsa_amd_spm_acquire;
|
||||
hsa_amd_spm_release;
|
||||
hsa_amd_spm_set_dest_buffer;
|
||||
hsa_amd_portable_export_dmabuf;
|
||||
hsa_amd_portable_close_dmabuf;
|
||||
|
||||
local:
|
||||
*;
|
||||
|
||||
@@ -499,7 +499,12 @@ typedef enum {
|
||||
* Returns true if mwaitx is enabled on this system
|
||||
* The type of this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203
|
||||
HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203,
|
||||
/**
|
||||
* Returns true if DMABUF APIs are supported by the driver. The type of
|
||||
* this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED = 0x204
|
||||
} hsa_system_info_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -192,6 +192,8 @@ struct AmdExtTable {
|
||||
decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
|
||||
decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
|
||||
decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf)* hsa_amd_portable_export_dmabuf_fn;
|
||||
decltype(hsa_amd_portable_close_dmabuf)* hsa_amd_portable_close_dmabuf_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
|
||||
@@ -48,8 +48,12 @@
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
|
||||
/*
|
||||
* - 1.0 - initial version
|
||||
* - 1.1 - dmabuf export
|
||||
*/
|
||||
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 0
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -2581,6 +2585,76 @@ hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent);
|
||||
hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes,
|
||||
uint32_t* timeout, uint32_t* size_copied, void* dest,
|
||||
bool* is_data_loss);
|
||||
/**
|
||||
* @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
|
||||
*
|
||||
* Obtains an OS specific handle to GPU agent memory. The memory must be part
|
||||
* of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
|
||||
* The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
|
||||
* to the allocation.
|
||||
*
|
||||
* Shared access to the memory is not guaranteed to be fine grain coherent even
|
||||
* if the allocation exported is from a fine grain pool. The shared memory
|
||||
* consistency model will be no stronger than the model exported from, consult
|
||||
* the importing API to determine the final consistency model.
|
||||
*
|
||||
* The allocation's memory remains valid as long as the handle and any mapping
|
||||
* of the handle remains valid. When the handle and all mappings are closed
|
||||
* the backing memory will be released for reuse.
|
||||
*
|
||||
* @param[in] ptr Pointer to the allocation being exported.
|
||||
*
|
||||
* @param[in] size Size in bytes to export following @p ptr. The entire range
|
||||
* being exported must be contained within a single allocation.
|
||||
*
|
||||
* @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the
|
||||
* allocation. Contents will not be altered in the event of failure.
|
||||
*
|
||||
* @param[out] offset Offset in bytes into the memory referenced by the dma-buf
|
||||
* object at which @p ptr resides. Contents will not be altered in the event
|
||||
* of failure.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Export completed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by
|
||||
* @p ptr and @p size are not contained within a single allocation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr
|
||||
* and @p size was allocated on a device which can not export memory.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor,
|
||||
* @p dmabuf, could not be created.
|
||||
*/
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset);
|
||||
|
||||
/**
|
||||
* @brief Closes an OS specific, vendor neutral, handle to a memory allocation.
|
||||
*
|
||||
* Closes an OS specific handle to GPU agent memory.
|
||||
*
|
||||
* Applications should close a handle after imports are complete. The handle
|
||||
* is not required to remain open for the lifetime of imported mappings. The
|
||||
* referenced allocation will remain valid until all handles and mappings
|
||||
* are closed.
|
||||
*
|
||||
* @param[in] dmabuf Handle to be closed.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Handle closed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_RESOURCE_FREE A generic error was encountered
|
||||
* when closing the handle. The handle may have been closed already or an
|
||||
* async IO error may have occured.
|
||||
*/
|
||||
hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // end extern "C" block
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle