diff --git a/projects/rocr-runtime/libhsakmt/CMakeLists.txt b/projects/rocr-runtime/libhsakmt/CMakeLists.txt index 6849de8721..44b5dc603e 100644 --- a/projects/rocr-runtime/libhsakmt/CMakeLists.txt +++ b/projects/rocr-runtime/libhsakmt/CMakeLists.txt @@ -129,7 +129,8 @@ set ( HSAKMT_SRC "src/debug.c" "src/spm.c" "src/version.c" "src/svm.c" - "src/pc_sampling.c") + "src/pc_sampling.c" + "src/ais.c") ## Declare the library target name add_library (${HSAKMT_TARGET} STATIC "") diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h index 787e29275d..02fbbd7f80 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h @@ -1208,6 +1208,32 @@ hsaKmtPcSamplingStop( HsaPcSamplingTraceId traceId ); +/** + * Direct IO Read or write a file from/to GPU buffer + * + * Arguments: + * @MemoryAddress (IN) - Allocated buffer to read / write + * @MemorySizeInBytes (IN) - Size in bytes to read / write. Should be page aligned + * @fd (IN) - File descriptor of the file to be read / write + * @file_offset (IN) - Offset from beginning of the file where read/write should happen + * @AisFlags (IN) - Flag that indicates read / write operation + * + * Return: + * HSAKMT_STATUS_ERROR - failed + * HSAKMT_STATUS_SUCCESS - successfully complete + */ + +HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile( + void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAint32 fd, + HSAint64 file_offset, + HsaAisFlags AisFlags, + HSAuint64 *SizeCopiedInBytes, + HSAint32 *status +); + + /** * Check if the HSA KMT Model is enabled * diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h index fd1661f06e..9784d36373 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h @@ -1513,6 +1513,11 @@ typedef union #pragma pack(pop, hsakmttypes_h) +typedef enum _HsaAisFlags { + HSA_AIS_READ = 0x1, + HSA_AIS_WRITE= 0x2 +} HsaAisFlags; + #ifdef __cplusplus } //extern "C" diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h index 66b499d424..0f9eeb1124 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/linux/kfd_ioctl.h @@ -1626,7 +1626,7 @@ struct kfd_ioctl_pmc_settings { }; struct kfd_ioctl_profiler_args { - __u32 op; /* kfd_profiler_op */ + __u32 op; /* kfd_profiler_op */ union { struct kfd_ioctl_pc_sample_args pc_sample; struct kfd_ioctl_pmc_settings pmc; @@ -1634,6 +1634,63 @@ struct kfd_ioctl_profiler_args { }; }; +/** + * kfd_ais_ops - AIS ioctl operations + * + * @KFD_IOC_AIS_READ: Direct IO read from a file into VRAM + * @KFD_IOC_AIS_WRITE: Direct IO write into a file from VRAM + */ +enum kfd_ais_ops { + KFD_IOC_AIS_READ = 1, + KFD_IOC_AIS_WRITE = 2, +}; + +/** + * kfd_ais_in_args + * + * @op (IN) - kfd_ais_ops + * @fd (IN) - file descriptor of the file to read/write + * @handle (IN) - memory handle returned by alloc. Should be mapped to + * the GPU with AMDKFD_IOC_MAP_MEMORY_TO_GPU. + * @handle_offset (IN) - offset into the allocated memory to read/write + * @file_offset (IN) - offset from the beginning of the file to read/write + * @size (IN) - size in bytes to read/write + */ + +struct kfd_ais_in_args { + __u64 handle; /* to KFD */ + __u64 handle_offset; /* to KFD */ + __s64 file_offset; /* to KFD */ + __u64 size; /* to KFD */ + __u32 op; /* to KFD */ + __s32 fd; /* to KFD */ +}; + +/** + * kfd_ais_out_args + * + * @size_copied (OUT) KFD returns number of bytes transferred + * @status (OUT) 0 for success and -ve error values if failure + */ +struct kfd_ais_out_args { + __u64 size_copied; /* from KFD */ + __s32 status; /* from KFD */ + __s32 pad; /* unused */ +}; + +/** + * Arguments for AMDKFD_IOC_AIS_OP + * AIS (AMD Infinity Storage) operations. + * See @kfd_ais_in_args and @kfd_ais_out_args + */ + +struct kfd_ioctl_ais_args { + union { + struct kfd_ais_in_args in; + struct kfd_ais_out_args out; + }; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -1776,7 +1833,10 @@ struct kfd_ioctl_profiler_args { #define AMDKFD_IOC_PROFILER \ AMDKFD_IOWR(0x86, struct kfd_ioctl_profiler_args) +#define AMDKFD_IOC_AIS_OP \ + AMDKFD_IOWR(0x87, struct kfd_ioctl_ais_args) + #define AMDKFD_COMMAND_START_2 0x80 -#define AMDKFD_COMMAND_END_2 0x87 +#define AMDKFD_COMMAND_END_2 0x88 #endif diff --git a/projects/rocr-runtime/libhsakmt/src/ais.c b/projects/rocr-runtime/libhsakmt/src/ais.c new file mode 100644 index 0000000000..aca8acc48f --- /dev/null +++ b/projects/rocr-runtime/libhsakmt/src/ais.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "libhsakmt.h" +#include "hsakmt/linux/kfd_ioctl.h" +#include +#include +#include "fmm.h" + + +HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile(void *MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAint32 fd, + HSAint64 file_offset, + HsaAisFlags AisFlags, + HSAuint64 *SizeCopiedInBytes, + HSAint32 *status) +{ + CHECK_KFD_OPEN(); + + struct kfd_ioctl_ais_args args = {0}; + uint64_t handle, size_offset = MemorySizeInBytes; + int ret; + + /* Support is only for dGPUs */ + + + if (!hsakmt_fmm_get_handle(MemoryAddress, &handle, &size_offset)) { + pr_err("Address/size out of range: %p/%lu\n", MemoryAddress, MemorySizeInBytes); + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + args.in.handle = handle; + args.in.fd = fd; + args.in.file_offset = file_offset; + args.in.size = MemorySizeInBytes; + if (AisFlags == HSA_AIS_WRITE) + args.in.op = KFD_IOC_AIS_WRITE; + else if (AisFlags == HSA_AIS_READ) + args.in.op = KFD_IOC_AIS_READ; + else { + pr_err("Invalid AisFlags: %d\n", AisFlags); + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + args.in.handle_offset = size_offset; + ret = hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_AIS_OP, &args); + + if (SizeCopiedInBytes) + *SizeCopiedInBytes = args.out.size_copied; + if (status) + *status = args.out.status; + + return (ret < 0) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS; +} diff --git a/projects/rocr-runtime/libhsakmt/src/events.c b/projects/rocr-runtime/libhsakmt/src/events.c index 9ab7818a4d..2421a80dfd 100644 --- a/projects/rocr-runtime/libhsakmt/src/events.c +++ b/projects/rocr-runtime/libhsakmt/src/events.c @@ -86,7 +86,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, if (hsakmt_use_model) model_set_event_page(events_page, KFD_SIGNAL_EVENT_LIMIT); else - hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset); + hsakmt_fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset, NULL); } if (hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) { diff --git a/projects/rocr-runtime/libhsakmt/src/fmm.c b/projects/rocr-runtime/libhsakmt/src/fmm.c index 8f3cc65a5d..ca6062fbd2 100644 --- a/projects/rocr-runtime/libhsakmt/src/fmm.c +++ b/projects/rocr-runtime/libhsakmt/src/fmm.c @@ -3669,7 +3669,17 @@ int hsakmt_fmm_unmap_from_gpu(void *address) return ret; } -bool hsakmt_fmm_get_handle(void *address, uint64_t *handle) +/* + * Get memory @handle [OUT] for a given @address [IN] + * @size_offset [IN/OUT] If specified, then address can in fact be a range. + * And size_offset [IN] is provided to validate that [offset of address] + + * @size_offset [IN] is within the range of the object. If within range, + * then @size_offset [OUT] is set to the offset of the address from the + * base of the object. + * + * Returns true if the handle is found, false otherwise. + */ +bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset) { uint32_t i; manageable_aperture_t *aperture; @@ -3706,10 +3716,25 @@ bool hsakmt_fmm_get_handle(void *address, uint64_t *handle) pthread_mutex_lock(&aperture->fmm_mutex); /* Find the object to retrieve the handle */ - object = vm_find_object_by_address(aperture, address, 0); + if (!size_offset) + object = vm_find_object_by_address(aperture, address, 0); + else + object = vm_find_object_by_address_range(aperture, address); if (object && handle) { *handle = object->handles[0]; found = true; + if (size_offset) { + /* If size_offset is set, then validate if address + size + * is within range. If within range then return offset + * of the address from base */ + HSAuint64 offset = VOID_PTRS_SUB(address, object->start); + + if (offset + *size_offset > object->size) + found = false; + else + *size_offset = offset; + + } } pthread_mutex_unlock(&aperture->fmm_mutex); diff --git a/projects/rocr-runtime/libhsakmt/src/fmm.h b/projects/rocr-runtime/libhsakmt/src/fmm.h index cdecfa93d8..9cb3a8c220 100644 --- a/projects/rocr-runtime/libhsakmt/src/fmm.h +++ b/projects/rocr-runtime/libhsakmt/src/fmm.h @@ -60,7 +60,7 @@ void hsakmt_fmm_print(uint32_t node); HSAKMT_STATUS hsakmt_fmm_release(void *address); HSAKMT_STATUS hsakmt_fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address); int hsakmt_fmm_unmap_from_gpu(void *address); -bool hsakmt_fmm_get_handle(void *address, uint64_t *handle); +bool hsakmt_fmm_get_handle(void *address, uint64_t *handle, uint64_t *size_offset); HSAKMT_STATUS hsakmt_fmm_get_mem_info(const void *address, HsaPointerInfo *info); HSAKMT_STATUS hsakmt_fmm_set_mem_user_data(const void *mem, void *usr_data); #ifdef SANITIZER_AMDGPU diff --git a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver index fa2a427a45..357c7b04d9 100644 --- a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver +++ b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver @@ -89,6 +89,7 @@ hsaKmtPcSamplingDestroy; hsaKmtPcSamplingStart; hsaKmtPcSamplingStop; hsaKmtPcSamplingSupport; +hsaKmtAisReadWriteFile; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 656f601edd..e65d7302af 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1336,6 +1336,20 @@ hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file) { return amdExtTable->hsa_amd_enable_logging_fn(flags, file); } +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + return amdExtTable->hsa_amd_ais_file_write_fn(handle, devicePtr, size, file_offset, + size_copied, status); +} + +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + return amdExtTable->hsa_amd_ais_file_read_fn(handle, devicePtr, size, file_offset, + size_copied, status); +} + // Tools only table interfaces. namespace rocr { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 4d5fe0162a..e4e7c58499 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -371,6 +371,16 @@ hsa_status_t HSA_API hsa_amd_agent_set_async_scratch_limit(hsa_agent_t agent, si hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute, void* value); +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + // Mirrors Amd Extension Apis hsa_status_t HSA_API hsa_amd_enable_logging(uint8_t* flags, void* file); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h index f5feaa678b..3c0c5ee4f5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h @@ -324,6 +324,13 @@ class ThunkLoader { typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtPcSamplingSupport))(void); typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtModelEnabled))(bool* enable); typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtQueueRingDoorbell))(HSA_QUEUEID QueueId); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtAisReadWriteFile))(void *devicePtr, \ + HSAuint64 size, \ + HSAint32 fd, \ + HSAint64 file_offset, \ + HsaAisFlags flags, \ + HSAuint64 *SizeCopiedInBytes, \ + HSAint32 *status); /* drm API */ typedef int (DRM_DEF(amdgpu_device_initialize))(int fd, \ @@ -465,6 +472,7 @@ class ThunkLoader { HSAKMT_DEF(hsaKmtPcSamplingSupport)* HSAKMT_PFN(hsaKmtPcSamplingSupport); HSAKMT_DEF(hsaKmtModelEnabled)* HSAKMT_PFN(hsaKmtModelEnabled); HSAKMT_DEF(hsaKmtQueueRingDoorbell)* HSAKMT_PFN(hsaKmtQueueRingDoorbell); + HSAKMT_DEF(hsaKmtAisReadWriteFile)* HSAKMT_PFN(hsaKmtAisReadWriteFile); DRM_DEF(amdgpu_device_initialize)* DRM_PFN(amdgpu_device_initialize); DRM_DEF(amdgpu_device_deinitialize)* DRM_PFN(amdgpu_device_deinitialize); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index 8314f8b0ed..de2de29746 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -87,7 +87,7 @@ void HsaApiTable::Init() { // they can add preprocessor macros on the new functions constexpr size_t expected_core_api_table_size = 1016; - constexpr size_t expected_amd_ext_table_size = 608; + constexpr size_t expected_amd_ext_table_size = 624; constexpr size_t expected_image_ext_table_size = 128; constexpr size_t expected_finalizer_ext_table_size = 64; constexpr size_t expected_tools_table_size = 64; @@ -474,6 +474,8 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_agent_set_async_scratch_limit_fn = AMD::hsa_amd_agent_set_async_scratch_limit; amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info; amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging; + amd_ext_api.hsa_amd_ais_file_write_fn = AMD::hsa_amd_ais_file_write; + amd_ext_api.hsa_amd_ais_file_read_fn = AMD::hsa_amd_ais_file_read; amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all; amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine; amd_ext_api.hsa_amd_portable_export_dmabuf_v2_fn = AMD::hsa_amd_portable_export_dmabuf_v2; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index bc29100229..b311fe9861 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -1537,6 +1537,45 @@ hsa_status_t HSA_API hsa_amd_queue_get_info(hsa_queue_t* _queue, CATCH; } +hsa_status_t hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + TRY; + IS_OPEN(); + + if (devicePtr == nullptr || size == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Call the kernel module function through the thunk layer + HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtAisReadWriteFile)(devicePtr, size, handle.fd, + file_offset, HSA_AIS_WRITE, + size_copied, status); + + return (ret == HSAKMT_STATUS_SUCCESS) ? + HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CATCH; +} + +hsa_status_t hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status) { + TRY; + IS_OPEN(); + + if (devicePtr == nullptr || size == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Call the kernel module function through the thunk layer + HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtAisReadWriteFile)(devicePtr, size, handle.fd, + file_offset, HSA_AIS_READ, + size_copied, status); + + return (ret == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CATCH; +} + hsa_status_t hsa_amd_enable_logging(uint8_t* flags, void *file) { TRY; return core::Runtime::runtime_singleton_->EnableLogging(flags, file); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp index e479e2ccf3..5afdd14195 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp @@ -387,6 +387,9 @@ namespace core { DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)dlsym(thunk_handle, "amdgpu_device_initialize"); if (DRM_PFN(amdgpu_device_initialize) == NULL) goto ERROR; + HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)dlsym(thunk_handle, "hsaKmtAisReadWriteFile"); + if (HSAKMT_PFN(hsaKmtAisReadWriteFile) == NULL) goto ERROR; + DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)dlsym(thunk_handle, "amdgpu_device_deinitialize"); if (DRM_PFN(amdgpu_device_deinitialize) == NULL) goto ERROR; @@ -511,6 +514,7 @@ ERROR: HSAKMT_PFN(hsaKmtQueueRingDoorbell) = (HSAKMT_DEF(hsaKmtQueueRingDoorbell)*)(&hsaKmtQueueRingDoorbell); #endif HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)(&hsaKmtModelEnabled); + HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)(&hsaKmtAisReadWriteFile); DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)(&amdgpu_device_initialize); DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)(&amdgpu_device_deinitialize); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def index 3290552329..7be1b87e35 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def @@ -260,6 +260,8 @@ global: hsa_amd_enable_logging; hsa_amd_signal_wait_all; hsa_amd_portable_export_dmabuf_v2; + hsa_amd_ais_file_write; + hsa_amd_ais_file_read; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h index 6515b19700..cc33320269 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -271,6 +271,8 @@ struct AmdExtTable { decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn; decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn; decltype(hsa_amd_portable_export_dmabuf_v2)* hsa_amd_portable_export_dmabuf_v2_fn; + decltype(hsa_amd_ais_file_write)* hsa_amd_ais_file_write_fn; + decltype(hsa_amd_ais_file_read)* hsa_amd_ais_file_read_fn; }; // Table to export HSA Core Runtime Apis diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h index befd1e26e3..6cf1054823 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_api_trace_version.h @@ -58,7 +58,7 @@ // Step Ids of the Api tables exported by Hsa Core Runtime #define HSA_API_TABLE_STEP_VERSION 0x01 #define HSA_CORE_API_TABLE_STEP_VERSION 0x00 -#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x07 +#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x08 #define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00 #define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01 // Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index a0d9cd2005..3fd1f9348e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -64,9 +64,10 @@ * - 1.11 - hsa_amd_agent_info_t: HSA_AMD_AGENT_INFO_CLOCK_COUNTERS * - 1.12 - hsa_amd_pointer_info: HSA_EXT_POINTER_TYPE_HSA_VMEM and HSA_EXT_POINTER_TYPE_RESERVED_ADDR * - 1.13 - hsa_amd_pointer_info: Added new registered field to hsa_amd_pointer_info_t + * - 1.14 - hsa_amd_ais_file_write, hsa_amd_ais_file_read */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 13 +#define HSA_AMD_INTERFACE_VERSION_MINOR 14 #ifdef __cplusplus extern "C" { @@ -3654,6 +3655,95 @@ typedef enum { hsa_status_t hsa_amd_queue_get_info(hsa_queue_t* queue, hsa_queue_info_attribute_t attribute, void* value); +typedef struct hsa_amd_ais_file_handle_s { + /* + * file handle for AIS read & write. Linux will use fd. + * pad is keep the size consistent accross different platforms. + */ + union { + void* handle; + int fd; + uint8_t pad[8]; + }; +} hsa_amd_ais_file_handle_t; + +/** + * @brief Write data from device memory to a file + * + * Writes data from device memory buffer to a file at the specified offset. + * The device memory pointer must be accessible from the host and point to + * a valid allocation. + * + * EXPERIMENTAL: AIS read and write calls are currently in experimental phase and + * APIs may be modified + * + * @param[in] handle Handle of the file to write to. + * + * @param[in] devicePtr Device memory buffer pointer containing data to write. + * + * @param[in] size Size in bytes of the data to write. + * + * @param[in] file_offset Offset in bytes into the file where data will be written. + * + * @param[in/out] size_copied Actual number of bytes copied + * + * @param[in/out] status Additional status if any + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fd is invalid, @p devicePtr + * is NULL, or @p size is 0. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p devicePtr does not refer to + * a valid allocation. + * + * @retval ::HSA_STATUS_ERROR An error occurred during the write operation. + */ +hsa_status_t HSA_API hsa_amd_ais_file_write(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + +/** + * @brief Read data from a file to device memory + * + * Reads data from a file at the specified offset into a device memory buffer. + * The device memory pointer must be accessible from the host and point to + * a valid allocation. + * + * EXPERIMENTAL: AIS read and write calls are currently in experimental phase and + * APIs may be modified + * @param[in] hanlde Handle of the file to read from. + * + * @param[in] devicePtr Device memory buffer pointer to store the read data. + * + * @param[in] size Size in bytes of the data to read. + * + * @param[in] file_offset Offset in bytes into the file where data will be read from. + * + * @param[in/out] size_copied Actual number of bytes copied + * + * @param[in/out] status Additional status if any + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been + * initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p fd is invalid, @p devicePtr + * is NULL, or @p size is 0. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ALLOCATION @p devicePtr does not refer to + * a valid allocation. + * + * @retval ::HSA_STATUS_ERROR An error occurred during the read operation. + */ +hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, void *devicePtr, + uint64_t size, int64_t file_offset, + uint64_t *size_copied, int32_t *status); + /** * @brief logging types */