rocr: Add hsa_amd_portable_export_dmabuf_v2

The original version of hsa_amd_portable_export_dmabuf() did not
consider the conditions under which a dmabuf could be shared.
In the new version (hsa_amd_portable_export_dmabuf_v2()), the caller
can specify the flag HSA_AMD_DMABUF_MAPPING_TYPE_PCIE, which means they
want to share the dmabuf over PCIe. In that case, the new code will check
that if it is a PCIe GPU and it is not in a XGMI Hive then if
large-BAR is not supported, we will return an error.
Этот коммит содержится в:
Chris Freehill
2025-06-05 16:38:38 -05:00
коммит произвёл Freehill, Chris
родитель dce52be686
Коммит 3a9d14bb66
11 изменённых файлов: 114 добавлений и 23 удалений
+1 -1
Просмотреть файл
@@ -87,7 +87,7 @@ include(utils)
## Get version strings
get_version("1.16.0")
get_version("1.17.0")
if (${ROCM_PATCH_VERSION})
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
+6
Просмотреть файл
@@ -1246,6 +1246,12 @@ hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size
return amdExtTable->hsa_amd_portable_export_dmabuf_fn(ptr, size, dmabuf, offset);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset, uint64_t flags) {
return amdExtTable->hsa_amd_portable_export_dmabuf_v2_fn(ptr, size, dmabuf, offset, flags);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_close_dmabuf(int dmabuf) {
return amdExtTable->hsa_amd_portable_close_dmabuf_fn(dmabuf);
+3
Просмотреть файл
@@ -302,6 +302,9 @@ hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size,
uint32_t* size_copied, void* dest,
bool* is_data_loss);
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf_v2(const void* ptr,
size_t size, int* dmabuf, uint64_t* offset, uint64_t flags);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset);
+2 -1
Просмотреть файл
@@ -375,7 +375,8 @@ class Runtime {
hsa_status_t SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal);
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset);
hsa_status_t DmaBufExport(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset, uint64_t flags);
hsa_status_t DmaBufClose(int dmabuf);
+2 -1
Просмотреть файл
@@ -87,7 +87,7 @@ void HsaApiTable::Init() {
// they can add preprocessor macros on the new functions
constexpr size_t expected_core_api_table_size = 1016;
constexpr size_t expected_amd_ext_table_size = 600;
constexpr size_t expected_amd_ext_table_size = 608;
constexpr size_t expected_image_ext_table_size = 128;
constexpr size_t expected_finalizer_ext_table_size = 64;
constexpr size_t expected_tools_table_size = 64;
@@ -476,6 +476,7 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine;
amd_ext_api.hsa_amd_portable_export_dmabuf_v2_fn = AMD::hsa_amd_portable_export_dmabuf_v2;
}
void HsaApiTable::UpdateTools() {
+16 -2
Просмотреть файл
@@ -1307,14 +1307,28 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
}
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset) {
uint64_t* offset) {
TRY;
IS_OPEN();
IS_BAD_PTR(ptr);
IS_BAD_PTR(dmabuf);
IS_BAD_PTR(offset);
IS_ZERO(size);
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf,
offset, HSA_AMD_DMABUF_MAPPING_TYPE_NONE);
CATCH;
}
hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
int* dmabuf, uint64_t* offset, uint64_t flags) {
TRY;
IS_OPEN();
IS_BAD_PTR(ptr);
IS_BAD_PTR(dmabuf);
IS_BAD_PTR(offset);
IS_ZERO(size);
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size, dmabuf, offset);
return core::Runtime::runtime_singleton_->DmaBufExport(ptr, size,
dmabuf, offset, flags);
CATCH;
}
+10 -2
Просмотреть файл
@@ -3078,7 +3078,8 @@ Agent* Runtime::GetSVMPrefetchAgent(void* ptr, size_t size) {
return agents_by_node_[prefetch_node][0];
}
hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, uint64_t* offset) {
hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset, uint64_t flags) {
#ifdef __linux__
ScopedAcquire<KernelSharedMutex::Shared> lock(memory_lock_.shared());
// Lookup containing allocation.
@@ -3093,6 +3094,14 @@ hsa_status_t Runtime::DmaBufExport(const void* ptr, size_t size, int* dmabuf, ui
if (mem->second.region->owner()->device_type() != Agent::kAmdGpuDevice)
return HSA_STATUS_ERROR_INVALID_AGENT;
rocr::AMD::GpuAgent* owner =
static_cast<AMD::GpuAgent*>(mem->second.region->owner());
if (flags & HSA_AMD_DMABUF_MAPPING_TYPE_PCIE &&
!owner->is_xgmi_cpu_gpu() &&
!owner->LargeBarEnabled()) {
return (hsa_status_t)HSA_STATUS_ERROR_NOT_SUPPORTED;
}
int fd;
uint64_t off;
HSAKMT_STATUS err = HSAKMT_CALL(hsaKmtExportDMABufHandle(const_cast<void*>(ptr), size, &fd, &off));
@@ -3319,7 +3328,6 @@ hsa_status_t Runtime::VMemoryHandleUnmap(void* va, size_t size) {
if (va_chunk != va_ptr + size) {
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}
hsa_status_t status;
for (auto mappedHandleIt : mappedHandles) {
// Remove access from all agents that were allowed access
+1
Просмотреть файл
@@ -234,6 +234,7 @@ global:
hsa_amd_spm_release;
hsa_amd_spm_set_dest_buffer;
hsa_amd_portable_export_dmabuf;
hsa_amd_portable_export_dmabuf_v2;
hsa_amd_portable_close_dmabuf;
hsa_amd_vmem_address_reserve;
hsa_amd_vmem_address_reserve_align;
+1
Просмотреть файл
@@ -270,6 +270,7 @@ struct AmdExtTable {
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn;
};
// Table to export HSA Core Runtime Apis
+1 -1
Просмотреть файл
@@ -58,7 +58,7 @@
// Step Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_STEP_VERSION 0x01
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x06
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x07
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
+71 -15
Просмотреть файл
@@ -59,9 +59,10 @@
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
* - 1.7 - hsa_amd_signal_wait_all
* - 1.8 - hsa_amd_memory_get_preferred_copy_engine
* - 1.9 - hsa_amd_portable_export_dmabuf_v2
*/
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
#define HSA_AMD_INTERFACE_VERSION_MINOR 8
#define HSA_AMD_INTERFACE_VERSION_MINOR 9
#ifdef __cplusplus
extern "C" {
@@ -446,6 +447,11 @@ enum {
* Resource is busy or temporarily unavailable
*/
HSA_STATUS_ERROR_RESOURCE_BUSY = 46,
/**
* Request is not supported by this system
*/
HSA_STATUS_ERROR_NOT_SUPPORTED = 47,
};
/** @} */
@@ -759,6 +765,17 @@ typedef enum hsa_amd_coherency_type_s {
} hsa_amd_coherency_type_t;
/**
* @brief dmabuf attributes
*/
#ifdef __cplusplus
typedef enum hsa_amd_dma_buf_mapping_type_s : int {
#else
typedef enum hsa_amd_dma_buf_mapping_type_s {
#endif
HSA_AMD_DMABUF_MAPPING_TYPE_NONE = 0,
HSA_AMD_DMABUF_MAPPING_TYPE_PCIE = 1
} hsa_amd_dma_buf_mapping_type_t;
/**
* @brief Get the coherency type of the fine grain region of an agent.
*
@@ -3138,21 +3155,10 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
*/
/**
* @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
* @brief Older version of hsa_amd_portable_export_dmabuf_v2
*
* Obtains an OS specific handle to GPU agent memory. The memory must be part
* of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
* The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
* to the allocation.
*
* Shared access to the memory is not guaranteed to be fine grain coherent even
* if the allocation exported is from a fine grain pool. The shared memory
* consistency model will be no stronger than the model exported from, consult
* the importing API to determine the final consistency model.
*
* The allocation's memory remains valid as long as the handle and any mapping
* of the handle remains valid. When the handle and all mappings are closed
* the backing memory will be released for reuse.
* This is the same as calling hsa_amd_portable_export_dmabuf_v2() with the
* flags argument set to HSA_AMD_DMABUF_MAPPING_TYPE_NONE.
*
* @param[in] ptr Pointer to the allocation being exported.
*
@@ -3185,6 +3191,56 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz
hsa_status_t hsa_amd_portable_export_dmabuf(const void* ptr, size_t size, int* dmabuf,
uint64_t* offset);
/**
* @brief Obtains an OS specific, vendor neutral, handle to a memory allocation.
*
* Obtains an OS specific handle to GPU agent memory. The memory must be part
* of a single allocation from an hsa_amd_memory_pool_t exposed by a GPU Agent.
* The handle may be used with other APIs (e.g. Vulkan) to obtain shared access
* to the allocation.
*
* Shared access to the memory is not guaranteed to be fine grain coherent even
* if the allocation exported is from a fine grain pool. The shared memory
* consistency model will be no stronger than the model exported from, consult
* the importing API to determine the final consistency model.
*
* The allocation's memory remains valid as long as the handle and any mapping
* of the handle remains valid. When the handle and all mappings are closed
* the backing memory will be released for reuse.
*
* @param[in] ptr Pointer to the allocation being exported.
*
* @param[in] size Size in bytes to export following @p ptr. The entire range
* being exported must be contained within a single allocation.
*
* @param[out] dmabuf Pointer to a dma-buf file descriptor holding a reference to the
* allocation. Contents will not be altered in the event of failure.
*
* @param[out] offset Offset in bytes into the memory referenced by the dma-buf
* object at which @p ptr resides. Contents will not be altered in the event
* of failure.
*
* @param[in] flags Bitmask of hsa_amd_dma_buf_mapping_type_t flags.
*
* @retval ::HSA_STATUS_SUCCESS Export completed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
* initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT One or more arguments is NULL.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION The address range described by
* @p ptr and @p size are not contained within a single allocation.
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The allocation described by @p ptr
* and @p size was allocated on a device which can not export memory.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The return file descriptor,
* @p dmabuf, could not be created.
*/
hsa_status_t hsa_amd_portable_export_dmabuf_v2(const void* ptr, size_t size,
int* dmabuf, uint64_t* offset, uint64_t flags);
/**
* @brief Closes an OS specific, vendor neutral, handle to a memory allocation.
*