rocr: Add hsa_amd_portable_export_dmabuf_v2
The original version of hsa_amd_portable_export_dmabuf() did not consider the conditions under which a dmabuf could be shared. In the new version (hsa_amd_portable_export_dmabuf_v2()), the caller can specify the flag HSA_AMD_DMABUF_MAPPING_TYPE_PCIE, which means they want to share the dmabuf over PCIe. In that case, the new code will check that if it is a PCIe GPU and it is not in a XGMI Hive then if large-BAR is not supported, we will return an error.
Этот коммит содержится в:
коммит произвёл
Freehill, Chris
родитель
dce52be686
Коммит
3a9d14bb66
@@ -87,7 +87,7 @@ include(utils)
|
||||
|
||||
|
||||
## Get version strings
|
||||
get_version("1.16.0")
|
||||
get_version("1.17.0")
|
||||
if (${ROCM_PATCH_VERSION})
|
||||
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
endif()
|
||||
|
||||
@@ -1246,6 +1246,12 @@ hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size
|
||||
return amdExtTable->hsa_amd_portable_export_dmabuf_fn(ptr, size, dmabuf, offset);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset, uint64_t flags) {
|
||||
return amdExtTable->hsa_amd_portable_export_dmabuf_v2_fn(ptr, size, dmabuf, offset, flags);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf) {
|
||||
return amdExtTable->hsa_amd_portable_close_dmabuf_fn(dmabuf);
|
||||
|
||||
@@ -302,6 +302,9 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size,
|
||||
uint32_t* size_copied, void* dest,
|
||||
bool* is_data_loss);
|
||||
|
||||
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr,
|
||||
size_t size, int* dmabuf, uint64_t* offset, uint64_t flags);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset);
|
||||
|
||||
@@ -375,7 +375,8 @@ class Runtime {
|
||||
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
|
||||
|
||||
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset);
|
||||
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset, uint64_t flags);
|
||||
|
||||
hsa_status_t DmaBufClose(int dmabuf);
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ void HsaApiTable::Init() {
|
||||
// they can add preprocessor macros on the new functions
|
||||
|
||||
constexpr size_t expected_core_api_table_size = 1016;
|
||||
constexpr size_t expected_amd_ext_table_size = 600;
|
||||
constexpr size_t expected_amd_ext_table_size = 608;
|
||||
constexpr size_t expected_image_ext_table_size = 128;
|
||||
constexpr size_t expected_finalizer_ext_table_size = 64;
|
||||
constexpr size_t expected_tools_table_size = 64;
|
||||
@@ -476,6 +476,7 @@ void HsaApiTable::UpdateAmdExts() {
|
||||
amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
|
||||
amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
|
||||
amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine;
|
||||
amd_ext_api.hsa_amd_portable_export_dmabuf_v2_fn = AMD::hsa_amd_portable_export_dmabuf_v2;
|
||||
}
|
||||
|
||||
void HsaApiTable::UpdateTools() {
|
||||
|
||||
@@ -1307,14 +1307,28 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset) {
|
||||
uint64_t* offset) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(ptr);
|
||||
IS_BAD_PTR(dmabuf);
|
||||
IS_BAD_PTR(offset);
|
||||
IS_ZERO(size);
|
||||
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf,
|
||||
offset, HSA_AMD_DMABUF_MAPPING_TYPE_NONE);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
|
||||
int* dmabuf, uint64_t* offset, uint64_t flags) {
|
||||
TRY;
|
||||
IS_OPEN();
|
||||
IS_BAD_PTR(ptr);
|
||||
IS_BAD_PTR(dmabuf);
|
||||
IS_BAD_PTR(offset);
|
||||
IS_ZERO(size);
|
||||
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf, offset);
|
||||
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size,
|
||||
dmabuf, offset, flags);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
|
||||
@@ -3078,7 +3078,8 @@ Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
|
||||
return agents_by_node_[prefetch_node][0];
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset) {
|
||||
hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset, uint64_t flags) {
|
||||
#ifdef __linux__
|
||||
ScopedAcquire<KernelSharedMutex::Shared> lock(memory_lock_.shared());
|
||||
// Lookup containing allocation.
|
||||
@@ -3093,6 +3094,14 @@ hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, ui
|
||||
if (mem->second.region->owner()->device_type() != Agent::kAmdGpuDevice)
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
|
||||
rocr::AMD::GpuAgent* owner =
|
||||
static_cast<AMD::GpuAgent*>(mem->second.region->owner());
|
||||
|
||||
if (flags & HSA_AMD_DMABUF_MAPPING_TYPE_PCIE &&
|
||||
!owner->is_xgmi_cpu_gpu() &&
|
||||
!owner->LargeBarEnabled()) {
|
||||
return (hsa_status_t)HSA_STATUS_ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
int fd;
|
||||
uint64_t off;
|
||||
HSAKMT_STATUS err = HSAKMT_CALL(hsaKmtExportDMABufHandle(const_cast<void*>(ptr), size, &fd, &off));
|
||||
@@ -3319,7 +3328,6 @@ hsa_status_t Runtime::VMemoryHandleUnmap(void* va, size_t size) {
|
||||
if (va_chunk != va_ptr + size) {
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
hsa_status_t status;
|
||||
|
||||
for (auto mappedHandleIt : mappedHandles) {
|
||||
// Remove access from all agents that were allowed access
|
||||
|
||||
@@ -234,6 +234,7 @@ global:
|
||||
hsa_amd_spm_release;
|
||||
hsa_amd_spm_set_dest_buffer;
|
||||
hsa_amd_portable_export_dmabuf;
|
||||
hsa_amd_portable_export_dmabuf_v2;
|
||||
hsa_amd_portable_close_dmabuf;
|
||||
hsa_amd_vmem_address_reserve;
|
||||
hsa_amd_vmem_address_reserve_align;
|
||||
|
||||
@@ -270,6 +270,7 @@ struct AmdExtTable {
|
||||
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
|
||||
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
|
||||
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
|
||||
decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
// Step Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_STEP_VERSION 0x01
|
||||
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x06
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x07
|
||||
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
|
||||
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
|
||||
|
||||
@@ -59,9 +59,10 @@
|
||||
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
|
||||
* - 1.7 - hsa_amd_signal_wait_all
|
||||
* - 1.8 - hsa_amd_memory_get_preferred_copy_engine
|
||||
* - 1.9 - hsa_amd_portable_export_dmabuf_v2
|
||||
*/
|
||||
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 8
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 9
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -446,6 +447,11 @@ enum {
|
||||
* Resource is busy or temporarily unavailable
|
||||
*/
|
||||
HSA_STATUS_ERROR_RESOURCE_BUSY = 46,
|
||||
|
||||
/**
|
||||
* Request is not supported by this system
|
||||
*/
|
||||
HSA_STATUS_ERROR_NOT_SUPPORTED = 47,
|
||||
};
|
||||
|
||||
/** @} */
|
||||
@@ -759,6 +765,17 @@ typedef enum hsa_amd_coherency_type_s {
|
||||
} hsa_amd_coherency_type_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief dmabuf attributes
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
typedef enum hsa_amd_dma_buf_mapping_type_s : int {
|
||||
#else
|
||||
typedef enum hsa_amd_dma_buf_mapping_type_s {
|
||||
#endif
|
||||
HSA_AMD_DMABUF_MAPPING_TYPE_NONE = 0,
|
||||
HSA_AMD_DMABUF_MAPPING_TYPE_PCIE = 1
|
||||
} hsa_amd_dma_buf_mapping_type_t;
|
||||
/**
|
||||
* @brief Get the coherency type of the fine grain region of an agent.
|
||||
*
|
||||
@@ -3138,21 +3155,10 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
|
||||
* @brief Older version of hsa_amd_portable_export_dmabuf_v2
|
||||
*
|
||||
* Obtains an OS specific handle to GPU agent memory. The memory must be part
|
||||
* of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
|
||||
* The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
|
||||
* to the allocation.
|
||||
*
|
||||
* Shared access to the memory is not guaranteed to be fine grain coherent even
|
||||
* if the allocation exported is from a fine grain pool. The shared memory
|
||||
* consistency model will be no stronger than the model exported from, consult
|
||||
* the importing API to determine the final consistency model.
|
||||
*
|
||||
* The allocation's memory remains valid as long as the handle and any mapping
|
||||
* of the handle remains valid. When the handle and all mappings are closed
|
||||
* the backing memory will be released for reuse.
|
||||
* This is the same as calling hsa_amd_portable_export_dmabuf_v2() with the
|
||||
* flags argument set to HSA_AMD_DMABUF_MAPPING_TYPE_NONE.
|
||||
*
|
||||
* @param[in] ptr Pointer to the allocation being exported.
|
||||
*
|
||||
@@ -3185,6 +3191,56 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
|
||||
uint64_t* offset);
|
||||
|
||||
/**
|
||||
* @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
|
||||
*
|
||||
* Obtains an OS specific handle to GPU agent memory. The memory must be part
|
||||
* of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
|
||||
* The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
|
||||
* to the allocation.
|
||||
*
|
||||
* Shared access to the memory is not guaranteed to be fine grain coherent even
|
||||
* if the allocation exported is from a fine grain pool. The shared memory
|
||||
* consistency model will be no stronger than the model exported from, consult
|
||||
* the importing API to determine the final consistency model.
|
||||
*
|
||||
* The allocation's memory remains valid as long as the handle and any mapping
|
||||
* of the handle remains valid. When the handle and all mappings are closed
|
||||
* the backing memory will be released for reuse.
|
||||
*
|
||||
* @param[in] ptr Pointer to the allocation being exported.
|
||||
*
|
||||
* @param[in] size Size in bytes to export following @p ptr. The entire range
|
||||
* being exported must be contained within a single allocation.
|
||||
*
|
||||
* @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the
|
||||
* allocation. Contents will not be altered in the event of failure.
|
||||
*
|
||||
* @param[out] offset Offset in bytes into the memory referenced by the dma-buf
|
||||
* object at which @p ptr resides. Contents will not be altered in the event
|
||||
* of failure.
|
||||
*
|
||||
* @param[in] flags Bitmask of hsa_amd_dma_buf_mapping_type_t flags.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS Export completed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
|
||||
* initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by
|
||||
* @p ptr and @p size are not contained within a single allocation.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr
|
||||
* and @p size was allocated on a device which can not export memory.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor,
|
||||
* @p dmabuf, could not be created.
|
||||
*/
|
||||
hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
|
||||
int* dmabuf, uint64_t* offset, uint64_t flags);
|
||||
|
||||
/**
|
||||
* @brief Closes an OS specific, vendor neutral, handle to a memory allocation.
|
||||
*
|
||||
|
||||
Ссылка в новой задаче
Block a user