From 5e7210980ea8089dce81fd814448a204fab64568 Mon Sep 17 00:00:00 2001 From: hkasivis <58193769+hkasivis@users.noreply.github.com> Date: Sat, 20 Sep 2025 11:30:05 -0400 Subject: [PATCH] Users/hkasivis/add ais support v2.1 (#928) * libhsakmt: Update hsakmt_fmm_get_handle to support address range Currently, hsakmt_fmm_get_handle works only if the address is allocated (staring) value. Update it so it can find the handle if address falls in the valid allocated range. This is useful for AMD infinity storage feature where data needs to be transferred to any memory within in the allocated range Signed-off-by: Harish Kasiviswanathan * libhsakmt: Introduce AMD Infinity Storage (AIS) API Add hsaKmtAisReadWriteFile() API to support AMD Infinity Storage. The API moves data directly from GPU VRAM to a file. v2: Add in/out ioctl arguments to provide more status information to user space. Modify hsaKmt API also accordingly. Signed-off-by: Harish Kasiviswanathan * rocr: Initial implementation of AMD Infinity Storage (AIS) Implement first two API: hsa_amd_ais_file_write and hsa_amd_ais_file_read v2: Change API from hsa_amd_ to hsa_amd_ais_ Change API to take in handle instead of fd for compatibility accross different platforms Original Author: Chris Freehill Signed-off-by: Harish Kasiviswanathan --------- Signed-off-by: Harish Kasiviswanathan --- .../rocr-runtime/libhsakmt/CMakeLists.txt | 3 +- .../libhsakmt/include/hsakmt/hsakmt.h | 26 ++++++ .../libhsakmt/include/hsakmt/hsakmttypes.h | 5 + .../include/hsakmt/linux/kfd_ioctl.h | 64 ++++++++++++- projects/rocr-runtime/libhsakmt/src/ais.c | 77 ++++++++++++++++ projects/rocr-runtime/libhsakmt/src/events.c | 2 +- projects/rocr-runtime/libhsakmt/src/fmm.c | 29 +++++- projects/rocr-runtime/libhsakmt/src/fmm.h | 2 +- .../rocr-runtime/libhsakmt/src/libhsakmt.ver | 1 + .../core/common/hsa_table_interface.cpp | 14 +++ .../hsa-runtime/core/inc/hsa_ext_amd_impl.h | 10 ++ .../hsa-runtime/core/inc/thunk_loader.h | 8 ++ .../core/runtime/hsa_api_trace.cpp | 4 +- .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 39 ++++++++ .../hsa-runtime/core/runtime/thunk_loader.cpp | 4 + .../runtime/hsa-runtime/hsacore.so.def | 2 + .../runtime/hsa-runtime/inc/hsa_api_trace.h | 2 + .../hsa-runtime/inc/hsa_api_trace_version.h | 2 +- .../runtime/hsa-runtime/inc/hsa_ext_amd.h | 92 ++++++++++++++++++- 19 files changed, 376 insertions(+), 10 deletions(-) create mode 100644 projects/rocr-runtime/libhsakmt/src/ais.c diff --git a/projects/rocr-runtime/libhsakmt/CMakeLists.txt b/projects/rocr-runtime/libhsakmt/CMakeLists.txt index 6849de8721..44b5dc603e 100644 --- a/projects/rocr-runtime/libhsakmt/CMakeLists.txt +++ b/projects/rocr-runtime/libhsakmt/CMakeLists.txt @@ -129,7 +129,8 @@ set ( HSAKMT_SRC "src/debug.c" "src/spm.c" "src/version.c" "src/svm.c" - "src/pc_sampling.c") + "src/pc_sampling.c" + "src/ais.c") ## Declare the library target name add_library (${HSAKMT_TARGET} STATIC "") diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h index 787e29275d..02fbbd7f80 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h @@ -1208,6 +1208,32 @@ hsaKmtPcSamplingStop( HsaPcSamplingTraceId traceId ); +/** + * Direct IO Read or write a file from/to GPU buffer + * + * Arguments: + * @MemoryAddress (IN) - Allocated buffer to read / write + * @MemorySizeInBytes (IN) - Size in bytes to read / write. Should be page aligned + * @fd (IN) - File descriptor of the file to be read / write + * @file_offset (IN) - Offset from beginning of the file where read/write should happen + * @AisFlags (IN) - Flag that indicates read / write operation + * + * Return: + * HSAKMT_STATUS_ERROR - failed + * HSAKMT_STATUS_SUCCESS - successfully complete + */ + +HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile( + void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAint32 fd, + HSAint64 file_offset, + HsaAisFlags AisFlags, + HSAuint64 *SizeCopiedInBytes, + HSAint32 *status +); + + /** * Check if the HSA KMT Model is enabled * diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h index fd1661f06e..9784d36373 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h @@ -1513,6 +1513,11 @@ typedef union #pragma pack(pop, hsakmttypes_h) +typedef enum _HsaAisFlags { + HSA_AIS_READ = 0x1, + HSA_AIS_WRITE= 0x2 +} HsaAisFlags; + #ifdef __cplusplus } //extern "C" diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h index 66b499d424..0f9eeb1124 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h @@ -1626,7 +1626,7 @@ struct kfd_ioctl_pmc_settings { }; struct kfd_ioctl_profiler_args { - __u32 op; /* kfd_profiler_op */ + __u32 op; /* kfd_profiler_op */ union { struct kfd_ioctl_pc_sample_args pc_sample; struct kfd_ioctl_pmc_settings pmc; @@ -1634,6 +1634,63 @@ struct kfd_ioctl_profiler_args { }; }; +/** + * kfd_ais_ops - AIS ioctl operations + * + * @KFD_IOC_AIS_READ: Direct IO read from a file into VRAM + * @KFD_IOC_AIS_WRITE: Direct IO write into a file from VRAM + */ +enum kfd_ais_ops { + KFD_IOC_AIS_READ = 1, + KFD_IOC_AIS_WRITE = 2, +}; + +/** + * kfd_ais_in_args + * + * @op (IN) - kfd_ais_ops + * @fd (IN) - file descriptor of the file to read/write + * @handle (IN) - memory handle returned by alloc. Should be mapped to + * the GPU with AMDKFD_IOC_MAP_MEMORY_TO_GPU. + * @handle_offset (IN) - offset into the allocated memory to read/write + * @file_offset (IN) - offset from the beginning of the file to read/write + * @size (IN) - size in bytes to read/write + */ + +struct kfd_ais_in_args { + __u64 handle; /* to KFD */ + __u64 handle_offset; /* to KFD */ + __s64 file_offset; /* to KFD */ + __u64 size; /* to KFD */ + __u32 op; /* to KFD */ + __s32 fd; /* to KFD */ +}; + +/** + * kfd_ais_out_args + * + * @size_copied (OUT) KFD returns number of bytes transferred + * @status (OUT) 0 for success and -ve error values if failure + */ +struct kfd_ais_out_args { + __u64 size_copied; /* from KFD */ + __s32 status; /* from KFD */ + __s32 pad; /* unused */ +}; + +/** + * Arguments for AMDKFD_IOC_AIS_OP + * AIS (AMD Infinity Storage) operations. + * See @kfd_ais_in_args and @kfd_ais_out_args + */ + +struct kfd_ioctl_ais_args { + union { + struct kfd_ais_in_args in; + struct kfd_ais_out_args out; + }; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -1776,7 +1833,10 @@ struct kfd_ioctl_profiler_args { #define AMDKFD_IOC_PROFILER \ AMDKFD_IOWR(0x86, struct kfd_ioctl_profiler_args) +#define AMDKFD_IOC_AIS_OP \ + AMDKFD_IOWR(0x87, struct kfd_ioctl_ais_args) + #define AMDKFD_COMMAND_START_2 0x80 -#define AMDKFD_COMMAND_END_2 0x87 +#define AMDKFD_COMMAND_END_2 0x88 #endif diff --git a/projects/rocr-runtime/libhsakmt/src/ais.c b/projects/rocr-runtime/libhsakmt/src/ais.c new file mode 100644 index 0000000000..aca8acc48f --- /dev/null +++ b/projects/rocr-runtime/libhsakmt/src/ais.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "libhsakmt.h" +#include "hsakmt/linux/kfd_ioctl.h" +#include +#include +#include "fmm.h" + + +HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAint32 fd, + HSAint64 file_offset, + HsaAisFlags AisFlags, + HSAuint64 *SizeCopiedInBytes, + HSAint32 *status) +{ + CHECK_KFD_OPEN(); + + struct kfd_ioctl_ais_args args = {0}; + uint64_t handle, size_offset = MemorySizeInBytes; + int ret; + + /* Support is only for dGPUs */ + + + if (!hsakmt_fmm_get_handle(MemoryAddress, &handle, &size_offset)) { + pr_err("Address/size out of range: %p/%lu\n", MemoryAddress, MemorySizeInBytes); + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + args.in.handle = handle; + args.in.fd = fd; + args.in.file_offset = file_offset; + args.in.size = MemorySizeInBytes; + if (AisFlags == HSA_AIS_WRITE) + args.in.op = KFD_IOC_AIS_WRITE; + else if (AisFlags == HSA_AIS_READ) + args.in.op = KFD_IOC_AIS_READ; + else { + pr_err("Invalid AisFlags: %d\n", AisFlags); + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + args.in.handle_offset = size_offset; + ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AIS_OP, &args); + + if (SizeCopiedInBytes) + *SizeCopiedInBytes = args.out.size_copied; + if (status) + *status = args.out.status; + + return (ret < 0) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS; +} diff --git a/projects/rocr-runtime/libhsakmt/src/events.c b/projects/rocr-runtime/libhsakmt/src/events.c index 9ab7818a4d..2421a80dfd 100644 --- a/projects/rocr-runtime/libhsakmt/src/events.c +++ b/projects/rocr-runtime/libhsakmt/src/events.c @@ -86,7 +86,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, if (hsakmt_use_model) model_set_event_page(events_page, KFD_SIGNAL_EVENT_LIMIT); else - hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset); + hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset, NULL); } if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) { diff --git a/projects/rocr-runtime/libhsakmt/src/fmm.c b/projects/rocr-runtime/libhsakmt/src/fmm.c index 8f3cc65a5d..ca6062fbd2 100644 --- a/projects/rocr-runtime/libhsakmt/src/fmm.c +++ b/projects/rocr-runtime/libhsakmt/src/fmm.c @@ -3669,7 +3669,17 @@ int hsakmt_fmm_unmap_from_gpu(void *address) return ret; } -bool hsakmt_fmm_get_handle(void *address, uint64_t *handle) +/* + * Get memory @handle [OUT] for a given @address [IN] + * @size_offset [IN/OUT] If specified, then address can in fact be a range. + * And size_offset [IN] is provided to validate that [offset of address] + + * @size_offset [IN] is within the range of the object. If within range, + * then @size_offset [OUT] is set to the offset of the address from the + * base of the object. + * + * Returns true if the handle is found, false otherwise. + */ +bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset) { uint32_t i; manageable_aperture_t *aperture; @@ -3706,10 +3716,25 @@ bool hsakmt_fmm_get_handle(void *address, uint64_t *handle) pthread_mutex_lock(&aperture->fmm_mutex); /* Find the object to retrieve the handle */ - object = vm_find_object_by_address(aperture, address, 0); + if (!size_offset) + object = vm_find_object_by_address(aperture, address, 0); + else + object = vm_find_object_by_address_range(aperture, address); if (object && handle) { *handle = object->handles[0]; found = true; + if (size_offset) { + /* If size_offset is set, then validate if address + size + * is within range. If within range then return offset + * of the address from base */ + HSAuint64 offset = VOID_PTRS_SUB(address, object->start); + + if (offset + *size_offset > object->size) + found = false; + else + *size_offset = offset; + + } } pthread_mutex_unlock(&aperture->fmm_mutex); diff --git a/projects/rocr-runtime/libhsakmt/src/fmm.h b/projects/rocr-runtime/libhsakmt/src/fmm.h index cdecfa93d8..9cb3a8c220 100644 --- a/projects/rocr-runtime/libhsakmt/src/fmm.h +++ b/projects/rocr-runtime/libhsakmt/src/fmm.h @@ -60,7 +60,7 @@ void hsakmt_fmm_print(uint32_t node); HSAKMT_STATUS hsakmt_fmm_release(void *address); HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address); int hsakmt_fmm_unmap_from_gpu(void *address); -bool hsakmt_fmm_get_handle(void *address, uint64_t *handle); +bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset); HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info); HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data); #ifdef SANITIZER_AMDGPU diff --git a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver index fa2a427a45..357c7b04d9 100644 --- a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver +++ b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver @@ -89,6 +89,7 @@ hsaKmtPcSamplingDestroy; hsaKmtPcSamplingStart; hsaKmtPcSamplingStop; hsaKmtPcSamplingSupport; +hsaKmtAisReadWriteFile; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 656f601edd..e65d7302af 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1336,6 +1336,20 @@ hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file) { return amdExtTable->hsa_amd_enable_logging_fn(flags, file); } +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + return amdExtTable->hsa_amd_ais_file_write_fn(handle, devicePtr, size, file_offset, + size_copied, status); +} + +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + return amdExtTable->hsa_amd_ais_file_read_fn(handle, devicePtr, size, file_offset, + size_copied, status); +} + // Tools only table interfaces. namespace rocr { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 4d5fe0162a..e4e7c58499 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -371,6 +371,16 @@ hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, si hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute, void* value); +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + // Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h index f5feaa678b..3c0c5ee4f5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h @@ -324,6 +324,13 @@ class ThunkLoader { typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingSupport))(void); typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtModelEnabled))(bool* enable); typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtQueueRingDoorbell))(HSA_QUEUEID QueueId); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAisReadWriteFile))(void *devicePtr, \ + HSAuint64 size, \ + HSAint32 fd, \ + HSAint64 file_offset, \ + HsaAisFlags flags, \ + HSAuint64 *SizeCopiedInBytes, \ + HSAint32 *status); /* drm API */ typedef int (DRM_DEF(amdgpu_device_initialize))(int fd, \ @@ -465,6 +472,7 @@ class ThunkLoader { HSAKMT_DEF(hsaKmtPcSamplingSupport)* HSAKMT_PFN(hsaKmtPcSamplingSupport); HSAKMT_DEF(hsaKmtModelEnabled)* HSAKMT_PFN(hsaKmtModelEnabled); HSAKMT_DEF(hsaKmtQueueRingDoorbell)* HSAKMT_PFN(hsaKmtQueueRingDoorbell); + HSAKMT_DEF(hsaKmtAisReadWriteFile)* HSAKMT_PFN(hsaKmtAisReadWriteFile); DRM_DEF(amdgpu_device_initialize)* DRM_PFN(amdgpu_device_initialize); DRM_DEF(amdgpu_device_deinitialize)* DRM_PFN(amdgpu_device_deinitialize); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index 8314f8b0ed..de2de29746 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -87,7 +87,7 @@ void HsaApiTable::Init() { // they can add preprocessor macros on the new functions constexpr size_t expected_core_api_table_size = 1016; - constexpr size_t expected_amd_ext_table_size = 608; + constexpr size_t expected_amd_ext_table_size = 624; constexpr size_t expected_image_ext_table_size = 128; constexpr size_t expected_finalizer_ext_table_size = 64; constexpr size_t expected_tools_table_size = 64; @@ -474,6 +474,8 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_agent_set_async_scratch_limit_fn = AMD::hsa_amd_agent_set_async_scratch_limit; amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info; amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging; + amd_ext_api.hsa_amd_ais_file_write_fn = AMD::hsa_amd_ais_file_write; + amd_ext_api.hsa_amd_ais_file_read_fn = AMD::hsa_amd_ais_file_read; amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all; amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine; amd_ext_api.hsa_amd_portable_export_dmabuf_v2_fn = AMD::hsa_amd_portable_export_dmabuf_v2; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index bc29100229..b311fe9861 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -1537,6 +1537,45 @@ hsa_status_t HSA_API hsa_amd_queue_get_info(hsa_queue_t* _queue, CATCH; } +hsa_status_t hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + TRY; + IS_OPEN(); + + if (devicePtr == nullptr || size == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Call the kernel module function through the thunk layer + HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtAisReadWriteFile)(devicePtr, size, handle.fd, + file_offset, HSA_AIS_WRITE, + size_copied, status); + + return (ret == HSAKMT_STATUS_SUCCESS) ? + HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CATCH; +} + +hsa_status_t hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + TRY; + IS_OPEN(); + + if (devicePtr == nullptr || size == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Call the kernel module function through the thunk layer + HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtAisReadWriteFile)(devicePtr, size, handle.fd, + file_offset, HSA_AIS_READ, + size_copied, status); + + return (ret == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CATCH; +} + hsa_status_t hsa_amd_enable_logging(uint8_t* flags, void *file) { TRY; return core::Runtime::runtime_singleton_->EnableLogging(flags, file); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp index e479e2ccf3..5afdd14195 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp @@ -387,6 +387,9 @@ namespace core { DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)dlsym(thunk_handle, "amdgpu_device_initialize"); if (DRM_PFN(amdgpu_device_initialize) == NULL) goto ERROR; + HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)dlsym(thunk_handle, "hsaKmtAisReadWriteFile"); + if (HSAKMT_PFN(hsaKmtAisReadWriteFile) == NULL) goto ERROR; + DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)dlsym(thunk_handle, "amdgpu_device_deinitialize"); if (DRM_PFN(amdgpu_device_deinitialize) == NULL) goto ERROR; @@ -511,6 +514,7 @@ ERROR: HSAKMT_PFN(hsaKmtQueueRingDoorbell) = (HSAKMT_DEF(hsaKmtQueueRingDoorbell)*)(&hsaKmtQueueRingDoorbell); #endif HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)(&hsaKmtModelEnabled); + HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)(&hsaKmtAisReadWriteFile); DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)(&amdgpu_device_initialize); DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)(&amdgpu_device_deinitialize); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def index 3290552329..7be1b87e35 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def @@ -260,6 +260,8 @@ global: hsa_amd_enable_logging; hsa_amd_signal_wait_all; hsa_amd_portable_export_dmabuf_v2; + hsa_amd_ais_file_write; + hsa_amd_ais_file_read; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h index 6515b19700..cc33320269 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -271,6 +271,8 @@ struct AmdExtTable { decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn; decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn; decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn; + decltype(hsa_amd_ais_file_write)* hsa_amd_ais_file_write_fn; + decltype(hsa_amd_ais_file_read)* hsa_amd_ais_file_read_fn; }; // Table to export HSA Core Runtime Apis diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h index befd1e26e3..6cf1054823 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h @@ -58,7 +58,7 @@ // Step Ids of the Api tables exported by Hsa Core Runtime #define HSA_API_TABLE_STEP_VERSION 0x01 #define HSA_CORE_API_TABLE_STEP_VERSION 0x00 -#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x07 +#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x08 #define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00 #define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01 // Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index a0d9cd2005..3fd1f9348e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -64,9 +64,10 @@ * - 1.11 - hsa_amd_agent_info_t: HSA_AMD_AGENT_INFO_CLOCK_COUNTERS * - 1.12 - hsa_amd_pointer_info: HSA_EXT_POINTER_TYPE_HSA_VMEM and HSA_EXT_POINTER_TYPE_RESERVED_ADDR * - 1.13 - hsa_amd_pointer_info: Added new registered field to hsa_amd_pointer_info_t + * - 1.14 - hsa_amd_ais_file_write, hsa_amd_ais_file_read */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 13 +#define HSA_AMD_INTERFACE_VERSION_MINOR 14 #ifdef __cplusplus extern "C" { @@ -3654,6 +3655,95 @@ typedef enum { hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute, void* value); +typedef struct hsa_amd_ais_file_handle_s { + /* + * file handle for AIS read & write. Linux will use fd. + * pad is keep the size consistent accross different platforms. + */ + union { + void* handle; + int fd; + uint8_t pad[8]; + }; +} hsa_amd_ais_file_handle_t; + +/** + * @brief Write data from device memory to a file + * + * Writes data from device memory buffer to a file at the specified offset. + * The device memory pointer must be accessible from the host and point to + * a valid allocation. + * + * EXPERIMENTAL: AIS read and write calls are currently in experimental phase and + * APIs may be modified + * + * @param[in] handle Handle of the file to write to. + * + * @param[in] devicePtr Device memory buffer pointer containing data to write. + * + * @param[in] size Size in bytes of the data to write. + * + * @param[in] file_offset Offset in bytes into the file where data will be written. + * + * @param[in/out] size_copied Actual number of bytes copied + * + * @param[in/out] status Additional status if any + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fd is invalid, @p devicePtr + * is NULL, or @p size is 0. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p devicePtr does not refer to + * a valid allocation. + * + * @retval ::HSA_STATUS_ERROR An error occurred during the write operation. + */ +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + +/** + * @brief Read data from a file to device memory + * + * Reads data from a file at the specified offset into a device memory buffer. + * The device memory pointer must be accessible from the host and point to + * a valid allocation. + * + * EXPERIMENTAL: AIS read and write calls are currently in experimental phase and + * APIs may be modified + * @param[in] hanlde Handle of the file to read from. + * + * @param[in] devicePtr Device memory buffer pointer to store the read data. + * + * @param[in] size Size in bytes of the data to read. + * + * @param[in] file_offset Offset in bytes into the file where data will be read from. + * + * @param[in/out] size_copied Actual number of bytes copied + * + * @param[in/out] status Additional status if any + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fd is invalid, @p devicePtr + * is NULL, or @p size is 0. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p devicePtr does not refer to + * a valid allocation. + * + * @retval ::HSA_STATUS_ERROR An error occurred during the read operation. + */ +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + /** * @brief logging types */