diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h index e19441b5a8..957141243b 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt.h @@ -1254,10 +1254,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAisReadWriteFile( /** * Check if the HSA KMT Model is enabled - * + * * Arguments: * @enable (OUT) - true if the HSA KMT Model is enabled, false otherwise - * + * * Return: * HSAKMT_STATUS_ERROR - failed * HSAKMT_STATUS_SUCCESS - successfully complete @@ -1269,6 +1269,59 @@ hsaKmtModelEnabled( bool* enable // OUT ); + +/** + * Experimental APIs to abstract DRM calls to thunk +*/ +HSAKMT_STATUS +HSAKMTAPI +hsaKmtHandleImport( + const HsaExternalHandleDesc* ImportDesc, + HsaHandleImportResult* ImportResult, + HsaHandleImportFlags* Flags +); + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMemoryVaMap( + HsaMemoryObjectHandle Handle, + HSAuint64 offset, + HSAuint64 size, + HSAuint64 addr, + HsaMemoryMapFlags flags +); + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMemoryVaUnmap( + HsaMemoryObjectHandle Handle, + HSAuint64 offset, + HSAuint64 size, + HSAuint64 addr +); + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMemoryCpuMap( + HsaMemoryObjectHandle Handle, + void** out_cpu_ptr +); + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMemHandleFree( + HsaMemoryObjectHandle Handle +); + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtMemoryGetCpuAddr( + HsaAMDGPUDeviceHandle DeviceHandle, + HsaMemoryObjectHandle MemoryHandle, + HSAint32* fd, // OUT + HSAuint64* cpu_addr // OUT +); + #ifdef __cplusplus } //extern "C" #endif diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h index 0faff5ad69..f5c62817e0 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmttypes.h @@ -355,6 +355,7 @@ typedef struct _HsaNodeProperties HSAuint32 LuidLowPart; // Windows Locally Unique Identifier Low 4 bytes HSAuint32 LuidHighPart; // Windows Locally Unique Identifier High 4 bytes + HSAuint64 WallClockKHz; // Wall Clock Frequency in KHz } HsaNodeProperties; @@ -1053,7 +1054,7 @@ typedef enum _HSA_EVENTID_MEMORYFLAGS typedef struct _HsaAccessAttributeFailure { - unsigned int NotPresent : 1; // Page not present or supervisor privilege + unsigned int NotPresent : 1; // Page not present or supervisor privilege unsigned int ReadOnly : 1; // Write access to a read-only page unsigned int NoExecute : 1; // Execute access to a page marked NX unsigned int GpuAccess : 1; // Host access only @@ -1527,6 +1528,49 @@ typedef enum _HsaAisFlags { HSA_AIS_WRITE= 0x2 } HsaAisFlags; +/* memory object handle used for translating drm BO object*/ +typedef struct _HsaMemoryObjectHandle* HsaMemoryObjectHandle; + +/* Access Permissions for memory mapping */ +typedef enum _HsaMemoryMapFlags { + HSA_MEMORY_ACCESS_NONE = 0, + HSA_MEMORY_ACCESS_RO = 1, + HSA_MEMORY_ACCESS_WO = 2, + HSA_MEMORY_ACCESS_RW = 3 +} HsaMemoryMapFlags; + +/* Handle type for import */ +typedef enum _HsaExternalHandleType{ + HSA_EXTERNAL_HANDLE_GEM_FLINK_NAME = 0, + HSA_EXTERNAL_HANDLE_KMS = 1, + HSA_EXTERNAL_HANDLE_DMA_BUF = 2 +} HsaExternalHandleType; + +typedef struct _HsaExternalHandleDesc { + HsaAMDGPUDeviceHandle device_handle; // GPU device handle (used for import only) + HSAint32 fd; // dmabuf fd + HsaExternalHandleType type; // handle type + HSAuint32 metadata; // Used for IPC handles +} HsaExternalHandleDesc; + +typedef struct _HsaHandleImportResult { + HsaMemoryObjectHandle buf_handle; // Thunk buffer object handle + HSAuint64 alloc_size; // allocation size for import + HSAuint32 metadata; // Used for IPC handles +} HsaHandleImportResult; + +typedef struct _HsaMemoryExportResult { + HSAint32 fd; // dmabuf fd +} HsaMemoryExportResult; + +typedef struct _HsaHandleImportFlags { + struct { + unsigned int IPCHandle : 1; // Handle type is IPC + unsigned int SysMem : 1; // Memory type is System Memory + unsigned int UpdateMetadata : 1; // Update metadata with IPC handle + unsigned int Reserved : 29; + } ui32; +} HsaHandleImportFlags; #ifdef __cplusplus } //extern "C" diff --git a/projects/rocr-runtime/libhsakmt/src/libhsakmt.h b/projects/rocr-runtime/libhsakmt/src/libhsakmt.h index 9581f7bdd4..7440d55e02 100644 --- a/projects/rocr-runtime/libhsakmt/src/libhsakmt.h +++ b/projects/rocr-runtime/libhsakmt/src/libhsakmt.h @@ -209,6 +209,7 @@ void hsakmt_topology_setup_is_dgpu_param(HsaNodeProperties *props); bool hsakmt_topology_is_svm_needed(HSA_ENGINE_ID EngineId); HSAuint32 hsakmt_PageSizeFromFlags(unsigned int pageSizeFlags); +HSAuint64 MapDrmPerm(HsaMemoryMapFlags flags); void* hsakmt_allocate_exec_aligned_memory_gpu(HsaKFDContext *ctx, uint32_t size, uint32_t align, diff --git a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver index 65593e370c..1525660420 100644 --- a/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver +++ b/projects/rocr-runtime/libhsakmt/src/libhsakmt.ver @@ -91,6 +91,12 @@ hsaKmtPcSamplingStop; hsaKmtPcSamplingSupport; hsaKmtAisReadWriteFile; hsaKmtGetMemoryHandle; +hsaKmtHandleImport; +hsaKmtMemoryVaMap; +hsaKmtMemoryVaUnmap; +hsaKmtMemHandleFree; +hsaKmtMemoryGetCpuAddr; +hsaKmtMemoryCpuMap; local: *; }; diff --git a/projects/rocr-runtime/libhsakmt/src/memory.c b/projects/rocr-runtime/libhsakmt/src/memory.c index 6e717b7e3b..07cfb70fb5 100644 --- a/projects/rocr-runtime/libhsakmt/src/memory.c +++ b/projects/rocr-runtime/libhsakmt/src/memory.c @@ -32,6 +32,11 @@ #include #include #include + +#include +#include +#include + #include "fmm.h" HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicyCtx(HsaKFDContext *ctx, @@ -936,4 +941,163 @@ hsaKmtGetMemoryHandle(void* va, void* MemoryAddress, HSAuint64 SizeInBytes, CHECK_KFD_OPEN(); return HSAKMT_STATUS_NOT_SUPPORTED; +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtHandleImport(const HsaExternalHandleDesc* import_desc, + HsaHandleImportResult* import_res, HsaHandleImportFlags* flags) +{ + CHECK_KFD_OPEN(); + amdgpu_device_handle devhandle = (amdgpu_device_handle)import_desc->device_handle; + enum amdgpu_bo_handle_type type; + switch (import_desc->type) { + case HSA_EXTERNAL_HANDLE_GEM_FLINK_NAME: + type = amdgpu_bo_handle_type_gem_flink_name; + break; + case HSA_EXTERNAL_HANDLE_KMS: + type = amdgpu_bo_handle_type_kms; + break; + case HSA_EXTERNAL_HANDLE_DMA_BUF: + default: + type = amdgpu_bo_handle_type_dma_buf_fd; + break; + } + struct amdgpu_bo_import_result res; + int ret = amdgpu_bo_import(devhandle, type, import_desc->fd, &res); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + + if (flags->ui32.IPCHandle) { + //query buffer object for pre existing metadata + struct amdgpu_bo_info info = {0}; + ret = amdgpu_bo_query_info(res.buf_handle, &info); + if (ret) { + return HSAKMT_STATUS_INVALID_HANDLE; + } + uint32_t metadata = info.metadata.umd_metadata[0]; + uint32_t size_metadata = info.metadata.size_metadata; + if (flags->ui32.UpdateMetadata && !flags->ui32.SysMem) { + if (!!size_metadata) { // return pre-existing metadata + import_res->metadata = (HSAuint32)metadata; + } else { + struct amdgpu_bo_metadata buf_info = {0}; + buf_info.size_metadata = sizeof(HSAuint32); + buf_info.umd_metadata[0] = (uint32_t)import_desc->metadata; + amdgpu_bo_set_metadata(res.buf_handle, &buf_info); + } + } else if (import_desc->metadata != metadata) { + import_res->metadata = (HSAuint32)metadata; + return HSAKMT_STATUS_INVALID_PARAMETER; + } + } + + import_res->buf_handle = (HsaMemoryObjectHandle)res.buf_handle; + import_res->alloc_size = (HSAuint64)res.alloc_size; + return HSAKMT_STATUS_SUCCESS; +} + +HSAuint64 MapDrmPerm(HsaMemoryMapFlags flags) { + switch (flags) { + case HSA_MEMORY_ACCESS_RO: + return AMDGPU_VM_PAGE_READABLE; + case HSA_MEMORY_ACCESS_WO: + return AMDGPU_VM_PAGE_WRITEABLE; + case HSA_MEMORY_ACCESS_RW: + return AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + case HSA_MEMORY_ACCESS_NONE: + default: + return 0; + } +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtMemoryVaMap(HsaMemoryObjectHandle Handle, + HSAuint64 offset, HSAuint64 size, HSAuint64 addr, + HsaMemoryMapFlags flags) +{ + CHECK_KFD_OPEN(); + amdgpu_bo_handle drmhandle = (amdgpu_bo_handle)(Handle); + if (!drmhandle) { + return HSAKMT_STATUS_ERROR; + } + + int ret = amdgpu_bo_va_op(drmhandle, offset, size, addr, + MapDrmPerm(flags), AMDGPU_VA_OP_MAP); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtMemoryVaUnmap(HsaMemoryObjectHandle Handle, + HSAuint64 offset, HSAuint64 size, HSAuint64 addr) +{ + CHECK_KFD_OPEN(); + amdgpu_bo_handle drmhandle = (amdgpu_bo_handle)(Handle); + if (!drmhandle) { + return HSAKMT_STATUS_ERROR; + } + + int ret = amdgpu_bo_va_op(drmhandle, offset, size, addr, 0, + AMDGPU_VA_OP_UNMAP); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtMemHandleFree(HsaMemoryObjectHandle Handle) +{ + CHECK_KFD_OPEN(); + // Reset metadata for the handle + struct amdgpu_bo_metadata zero_metadata = {0}; + memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); + int ret = amdgpu_bo_set_metadata((amdgpu_bo_handle)Handle, &zero_metadata); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + ret = amdgpu_bo_free((amdgpu_bo_handle)Handle); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtMemoryCpuMap(HsaMemoryObjectHandle Handle, + void** out_cpu_ptr) +{ + int ret = amdgpu_bo_cpu_map((amdgpu_bo_handle)Handle, out_cpu_ptr); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI hsaKmtMemoryGetCpuAddr(HsaAMDGPUDeviceHandle DeviceHandle, + HsaMemoryObjectHandle MemoryHandle, HSAint32* fd, HSAuint64* cpu_addr) +{ + amdgpu_device_handle devhandle = (amdgpu_device_handle)DeviceHandle; + int renderFd = amdgpu_device_get_fd(devhandle); + if (renderFd < 0) return HSAKMT_STATUS_ERROR; + + uint32_t gem_handle = 0; + int ret = amdgpu_bo_export((amdgpu_bo_handle)MemoryHandle, amdgpu_bo_handle_type_kms, &gem_handle); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + + union drm_amdgpu_gem_mmap args; + memset(&args, 0, sizeof(args)); + /* Query the buffer address (args.addr_ptr). + * The kernel driver ignores the offset and size parameters. */ + args.in.handle = gem_handle; + ret = drmCommandWriteRead(renderFd, DRM_AMDGPU_GEM_MMAP, &args, sizeof(args)); + if (ret) { + return HSAKMT_STATUS_ERROR; + } + *fd = (HSAint32)renderFd; + *cpu_addr = (HSAuint64)args.out.addr_ptr; + return HSAKMT_STATUS_SUCCESS; } \ No newline at end of file diff --git a/projects/rocr-runtime/libhsakmt/src/topology.c b/projects/rocr-runtime/libhsakmt/src/topology.c index 8786000e79..b33c8590a4 100644 --- a/projects/rocr-runtime/libhsakmt/src/topology.c +++ b/projects/rocr-runtime/libhsakmt/src/topology.c @@ -1068,6 +1068,7 @@ static int topology_get_node_props_from_drm(HsaNodeProperties *props) props->FamilyID = gpu_info.family_id; props->Integrated = !!(gpu_info.ids_flags & AMDGPU_IDS_FLAGS_FUSION); + props->WallClockKHz = gpu_info.gpu_counter_freq; err_query_gpu_info: amdgpu_device_deinitialize(device_handle); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 915cbcf40e..855987dbe6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -90,24 +90,24 @@ namespace AMD { #if defined(__linux__) static_assert( - (sizeof(core::ShareableHandle::handle) >= sizeof(amdgpu_bo_handle)) && - (alignof(core::ShareableHandle::handle) >= alignof(amdgpu_bo_handle)), - "ShareableHandle cannot store a amdgpu_bo_handle"); + (sizeof(core::ShareableHandle::handle) >= sizeof(HsaMemoryObjectHandle)) && + (alignof(core::ShareableHandle::handle) >= alignof(HsaMemoryObjectHandle)), + "ShareableHandle cannot store a HsaMemoryObjectHandle"); #endif namespace { -__forceinline uint64_t drm_perm(hsa_access_permission_t perm) { +__forceinline HsaMemoryMapFlags mem_perm(hsa_access_permission_t perm) { switch (perm) { case HSA_ACCESS_PERMISSION_RO: - return AMDGPU_VM_PAGE_READABLE; + return HSA_MEMORY_ACCESS_RO; case HSA_ACCESS_PERMISSION_WO: - return AMDGPU_VM_PAGE_WRITEABLE; + return HSA_MEMORY_ACCESS_WO; case HSA_ACCESS_PERMISSION_RW: - return AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + return HSA_MEMORY_ACCESS_RW; case HSA_ACCESS_PERMISSION_NONE: default: - return 0; + return HSA_MEMORY_ACCESS_NONE; } } @@ -475,52 +475,70 @@ hsa_status_t KfdDriver::ExportDMABuf(void *mem, size_t size, int *dmabuf_fd, hsa_status_t KfdDriver::ImportDMABuf(int dmabuf_fd, core::Agent &agent, core::ShareableHandle &handle) { - auto& gpu_agent = static_cast(agent); - amdgpu_bo_import_result res; - auto ret = DRM_CALL( - amdgpu_bo_import(gpu_agent.libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res)); - if (ret) return HSA_STATUS_ERROR; - +#if defined(__linux__) + auto &gpu_agent = static_cast(agent); + HsaExternalHandleDesc desc; + desc.device_handle = gpu_agent.libThunkDev(); + desc.fd = reinterpret_cast(dmabuf_fd); + desc.type = HSA_EXTERNAL_HANDLE_DMA_BUF; + desc.metadata = 0; + HsaHandleImportFlags hflags = {0}; + HsaHandleImportResult res; + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtHandleImport(&desc, &res, &hflags)); + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } handle.handle = reinterpret_cast(res.buf_handle); +#else + assert(!"Unimplemented!"); +#endif return HSA_STATUS_SUCCESS; } hsa_status_t KfdDriver::Map(core::ShareableHandle handle, void *mem, size_t offset, size_t size, hsa_access_permission_t perms) { - const auto ldrm_bo = reinterpret_cast(handle.handle); - if (!ldrm_bo) - return HSA_STATUS_ERROR; - - if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast(mem), - drm_perm(perms), AMDGPU_VA_OP_MAP)) != 0) +#if defined(__linux__) + HsaMemoryObjectHandle memhandle = reinterpret_cast(handle.handle); + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemoryVaMap(memhandle, reinterpret_cast(offset), + reinterpret_cast(size), reinterpret_cast(mem), + mem_perm(perms))); + if (status != HSAKMT_STATUS_SUCCESS) { return HSA_STATUS_ERROR; + } +#else + assert(!"Unimplemented!"); +#endif return HSA_STATUS_SUCCESS; } hsa_status_t KfdDriver::Unmap(core::ShareableHandle handle, void *mem, size_t offset, size_t size) { - const auto ldrm_bo = reinterpret_cast(handle.handle); - if (!ldrm_bo) - return HSA_STATUS_ERROR; - - if (DRM_CALL(amdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast(mem), 0, - AMDGPU_VA_OP_UNMAP)) != 0) +#if defined(__linux__) + HsaMemoryObjectHandle memhandle = reinterpret_cast(handle.handle); + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemoryVaUnmap(memhandle, (HSAuint64)offset, (HSAuint64)size, + reinterpret_cast(mem))); + if (status != HSAKMT_STATUS_SUCCESS) { return HSA_STATUS_ERROR; + } +#else + assert(!"Unimplemented!"); +#endif return HSA_STATUS_SUCCESS; } hsa_status_t KfdDriver::ReleaseShareableHandle(core::ShareableHandle &handle) { - const auto ldrm_bo = reinterpret_cast(handle.handle); - if (!ldrm_bo) +#if defined(__linux__) + auto memhandle = reinterpret_cast(handle.handle); + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemHandleFree(memhandle)); + if (status != HSAKMT_STATUS_SUCCESS) { return HSA_STATUS_ERROR; - - const auto ret = DRM_CALL(amdgpu_bo_free(ldrm_bo)); - if (ret) - return HSA_STATUS_ERROR; - + } handle = {}; return HSA_STATUS_SUCCESS; +#else + assert(!"Unimplemented!"); +#endif } hsa_status_t KfdDriver::SPMAcquire(uint32_t preferred_node_id) const { @@ -731,15 +749,12 @@ hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const { hsa_status_t KfdDriver::GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const { assert(frequency); - amdgpu_gpu_info info; - amdgpu_device_handle handle; - if (GetDeviceHandle(node_id, reinterpret_cast(&handle)) != HSA_STATUS_SUCCESS) + HsaNodeProperties props; + if (GetNodeProperties(props, node_id) != HSA_STATUS_SUCCESS) { return HSA_STATUS_ERROR; + } - if (DRM_CALL(amdgpu_query_gpu_info(handle, &info)) < 0) return HSA_STATUS_ERROR; - - // Reported by libdrm in KHz. - *frequency = uint64_t(info.gpu_counter_freq) * 1000ull; + *frequency = uint64_t(props.WallClockKHz) * 1000ull; return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 1463bd9439..06bf55d346 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -434,6 +434,7 @@ class GpuAgent : public GpuAgentInt { // @brief returns the libdrm device handle __forceinline amdgpu_device_handle libDrmDev() const { return ldrm_dev_; } + __forceinline HsaAMDGPUDeviceHandle libThunkDev() const { return libthunk_dev_; } __forceinline void CheckClockTicks() { // If we did not update t1 since agent initialization, force a SyncClock. Otherwise computing @@ -831,6 +832,7 @@ class GpuAgent : public GpuAgentInt { // @brief device handle amdgpu_device_handle ldrm_dev_; + HsaAMDGPUDeviceHandle libthunk_dev_; DISALLOW_COPY_AND_ASSIGN(GpuAgent); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index 2f357abb64..df13c991f8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -540,7 +540,8 @@ class Runtime { size_requested(0), alloc_flags(core::MemoryRegion::AllocateNoFlags), user_ptr(nullptr), - ldrm_bo(NULL) {} + ldrm_bo(nullptr), + thunk_bo(nullptr) {} AllocationRegion(const MemoryRegion* region_arg, size_t size_arg, size_t size_requested, MemoryRegion::AllocateFlags alloc_flags) : region(region_arg), @@ -548,7 +549,8 @@ class Runtime { size_requested(size_requested), alloc_flags(alloc_flags), user_ptr(nullptr), - ldrm_bo(NULL) {} + ldrm_bo(nullptr), + thunk_bo(nullptr) {} struct notifier_t { void* ptr; @@ -563,6 +565,7 @@ class Runtime { void* user_ptr; std::unique_ptr> notifiers; amdgpu_bo_handle ldrm_bo; + HsaMemoryObjectHandle thunk_bo; }; struct AsyncEventsInfo; @@ -1012,7 +1015,8 @@ class Runtime { bool ipc_dmabuf_supported_; int IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, unsigned int numNodes, HSAuint32 *nodes, - void **importAddress, HSAuint64 *importSize, bool isdmabufSysmem); + void **importAddress, HSAuint64 *importSize, + bool isdmabufSysmem, uint32_t shared_handle); }; } // namespace core diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h index 57b65e4e4c..481b67b9cb 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/thunk_loader.h @@ -336,6 +336,25 @@ class ThunkLoader { void* MemoryAddress, \ HSAuint64 SizeInBytes, \ uint64_t* SharedMemoryHandle); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtHandleImport))(const HsaExternalHandleDesc* ImportDesc, \ + HsaHandleImportResult* ImportResult, \ + HsaHandleImportFlags* flags); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMemoryVaMap))(HsaMemoryObjectHandle Handle, \ + HSAuint64 offset, \ + HSAuint64 size, \ + HSAuint64 addr, \ + HsaMemoryMapFlags flags); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMemoryVaUnmap))(HsaMemoryObjectHandle Handle, \ + HSAuint64 offset, \ + HSAuint64 size, \ + HSAuint64 addr); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMemHandleFree))(HsaMemoryObjectHandle Handle); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMemoryGetCpuAddr))(HsaAMDGPUDeviceHandle DeviceHandle, \ + HsaMemoryObjectHandle MemoryHandle, \ + HSAint32* fd, \ + HSAuint64* cpu_addr); + typedef HSAKMT_STATUS (HSAKMT_DEF(hsaKmtMemoryCpuMap))(HsaMemoryObjectHandle Handle, \ + void** out_cpu_ptr); /* drm API */ typedef int (DRM_DEF(amdgpu_device_initialize))(int fd, \ uint32_t *major_version, \ @@ -484,6 +503,12 @@ class ThunkLoader { HSAKMT_DEF(hsaKmtQueueRingDoorbell)* HSAKMT_PFN(hsaKmtQueueRingDoorbell); HSAKMT_DEF(hsaKmtAisReadWriteFile)* HSAKMT_PFN(hsaKmtAisReadWriteFile); HSAKMT_DEF(hsaKmtGetMemoryHandle)* HSAKMT_PFN(hsaKmtGetMemoryHandle); + HSAKMT_DEF(hsaKmtHandleImport)* HSAKMT_PFN(hsaKmtHandleImport); + HSAKMT_DEF(hsaKmtMemoryVaMap)* HSAKMT_PFN(hsaKmtMemoryVaMap); + HSAKMT_DEF(hsaKmtMemoryVaUnmap)* HSAKMT_PFN(hsaKmtMemoryVaUnmap); + HSAKMT_DEF(hsaKmtMemHandleFree)* HSAKMT_PFN(hsaKmtMemHandleFree); + HSAKMT_DEF(hsaKmtMemoryGetCpuAddr)* HSAKMT_PFN(hsaKmtMemoryGetCpuAddr); + HSAKMT_DEF(hsaKmtMemoryCpuMap)* HSAKMT_PFN(hsaKmtMemoryCpuMap); DRM_DEF(amdgpu_device_initialize)* DRM_PFN(amdgpu_device_initialize); DRM_DEF(amdgpu_device_deinitialize)* DRM_PFN(amdgpu_device_deinitialize); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 39b73292d6..21590eb11d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -611,6 +611,7 @@ void GpuAgent::InitLibDrm() { "Agent creation failed.\nlibdrm get device handle failed.\n"); ldrm_dev_ = (amdgpu_device_handle)device_handle; + libthunk_dev_ = device_handle; } hsa_status_t GpuAgent::IterateRegion( diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 4fbf5aa145..a3811c25a6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -377,16 +377,14 @@ hsa_status_t Runtime::FreeMemory(void* ptr) { //track the exporter BO to clear meta data via set_metadata //clear the set metadata here if possible if theres an existing ldrm_bo - if (it->second.ldrm_bo) { + if (it->second.thunk_bo) { #if defined(__linux__) if (!thunkLoader()->IsDXG()) { - struct amdgpu_bo_info info = {0}; - auto err = DRM_CALL(amdgpu_bo_query_info(it->second.ldrm_bo, &info)); - //clear metadata - amdgpu_bo_metadata zero_metadata = {0}; - memset(zero_metadata.umd_metadata, 0, sizeof(uint32_t)); - DRM_CALL(amdgpu_bo_set_metadata(it->second.ldrm_bo, &zero_metadata)); + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemHandleFree(it->second.thunk_bo)); + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } } #else assert(!"Unimplemented!"); @@ -1409,30 +1407,27 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han #if defined(__linux__) if (!thunkLoader()->IsDXG()) { AMD::GpuAgent* agent_ = reinterpret_cast(agent); - amdgpu_bo_import_result res; srand(static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count())); handle->handle[7] = rand(); - //libdrm import for buffer object handle - if (DRM_CALL(amdgpu_bo_import(agent_->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res))) { - fprintf(stderr, "Error in amdgpu_bo_import\n"); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; + HsaExternalHandleDesc desc; + desc.device_handle = agent_->libThunkDev(); + desc.fd = reinterpret_cast(dmabuf_fd); + desc.type = HSA_EXTERNAL_HANDLE_DMA_BUF; + HsaHandleImportFlags hflags; + hflags.ui32.IPCHandle = 1; + hflags.ui32.SysMem = handle->handle[3]; + hflags.ui32.UpdateMetadata = 1; + HsaHandleImportResult res; + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtHandleImport(&desc, &res, &hflags)); + if (status == HSAKMT_STATUS_ERROR) { + close(dmabuf_fd); + return HSA_STATUS_ERROR; } - - //query buffer object for pre existing metadata - struct amdgpu_bo_info info = {0}; - if (!DRM_CALL(amdgpu_bo_query_info(res.buf_handle, &info)) && !!info.metadata.size_metadata) { - handle->handle[7] = info.metadata.umd_metadata[0]; - } else { - amdgpu_bo_metadata buf_info = {0}; - buf_info.size_metadata = sizeof(uint32_t); - buf_info.umd_metadata[0] = handle->handle[7]; - - DRM_CALL(amdgpu_bo_set_metadata(res.buf_handle, &buf_info)); - allocation_map_[ptr].ldrm_bo = res.buf_handle; - } - } + handle->handle[7] = res.metadata; + allocation_map_[ptr].thunk_bo = res.buf_handle; + } #else assert(!"Unimplemented!"); #endif @@ -1485,7 +1480,8 @@ hsa_status_t Runtime::IPCCreate(void* ptr, size_t len, hsa_amd_ipc_memory_t* han int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, unsigned int numNodes, HSAuint32 *nodes, - void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem) { + void **importAddress, HSAuint64 *importSize, bool isDmabufSysmem, + uint32_t shared_handle) { int dmabuf_fd = -1, socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); assert(socket_fd > -1 && "DMA buffer could not be imported for IPC!"); if (socket_fd == -1) return -1; @@ -1532,7 +1528,6 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, dmabuf_fd = ReceiveDmaBufFd(socket_fd); if (dmabuf_fd == -1) return -1; - amdgpu_bo_import_result res = {0}; HsaGraphicsResourceInfo info; HSA_REGISTER_MEM_FLAGS regFlags; regFlags.ui32.requiresVAddr = !isDmabufSysmem; @@ -1545,15 +1540,31 @@ int Runtime::IPCClientImport(uint32_t conn_handle, uint64_t dmabuf_fd_handle, HSAKMT_CALL(hsaKmtDeregisterMemory(*importAddress)); AMD::GpuAgent* agent = reinterpret_cast(agents_by_node_[info.NodeId][0]); - err = DRM_CALL(amdgpu_bo_import(agent->libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, - dmabuf_fd, &res)); + + HsaExternalHandleDesc desc; + desc.device_handle = agent->libThunkDev(); + desc.fd = reinterpret_cast(dmabuf_fd); + desc.type = HSA_EXTERNAL_HANDLE_DMA_BUF; + desc.metadata = reinterpret_cast(shared_handle); + HsaHandleImportFlags hflags; + hflags.ui32.IPCHandle = 1; + hflags.ui32.SysMem = isDmabufSysmem; + hflags.ui32.UpdateMetadata = 0; + HsaHandleImportResult res; + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtHandleImport(&desc, &res, &hflags)); + if (status != HSAKMT_STATUS_SUCCESS) { + fprintf(stderr, "IPC Client Import: Invalid IPC handle! expected %u, got %u\n", + shared_handle, res.metadata); + close(dmabuf_fd); + return -1; + } // Store the buffer object handle in allocation map for later use - if (err == HSAKMT_STATUS_SUCCESS) { + if (status == HSAKMT_STATUS_SUCCESS) { std::lock_guard lock(memory_lock_); allocation_map_[*importAddress] = AllocationRegion(nullptr, *importSize, *importSize, core::MemoryRegion::AllocateNoFlags); - allocation_map_[*importAddress].ldrm_bo = res.buf_handle; + allocation_map_[*importAddress].thunk_bo = res.buf_handle; } close(dmabuf_fd); } @@ -1575,7 +1586,7 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, bool isFragment = false; uint32_t fragOffset = 0; - auto fixFragment = [&](amdgpu_bo_handle ldrm_bo) { + auto fixFragment = [&](HsaMemoryObjectHandle thunk_bo) { if (isFragment) { importAddress = reinterpret_cast(importAddress) + fragOffset; len = Min(len, importSize - fragOffset); @@ -1583,36 +1594,21 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, std::lock_guard lock(memory_lock_); allocation_map_[importAddress] = AllocationRegion(nullptr, len, len, core::MemoryRegion::AllocateNoFlags); - allocation_map_[importAddress].ldrm_bo = ldrm_bo; + allocation_map_[importAddress].thunk_bo = thunk_bo; }; auto importMemory = [&](unsigned int numNodes, HSAuint32 *nodes, bool isSysMem) { int ret = ipc_dmabuf_supported_ ? IPCClientImport(importHandle.handle[2], dmaBufFDHandle, numNodes, - nodes, &importAddress, &importSize, isSysMem) : + nodes, &importAddress, &importSize, isSysMem, + importHandle.handle[7]) : HSAKMT_CALL(hsaKmtRegisterSharedHandle( reinterpret_cast(&importHandle), &importAddress, &importSize )); - if (ret) return HSA_STATUS_ERROR_INVALID_ARGUMENT; - if (ipc_dmabuf_supported_ && !isSysMem) { -#if defined(__linux__) - if (!thunkLoader()->IsDXG()) { - // use the bo from the allocation map - // Only check metadata for GPU memory - struct amdgpu_bo_info info = {0}; - int ret = DRM_CALL(amdgpu_bo_query_info(allocation_map_[importAddress].ldrm_bo, &info)); - - // Validate metadata for IPC handle - if (ret || info.metadata.umd_metadata[0] != importHandle.handle[7]) { - fprintf(stderr, "IPC Attach: Invalid IPC handle! %u and %u\n", importHandle.handle[7], info.metadata.umd_metadata[0]); - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } - } -#else - assert(!"Unimplemented!"); -#endif + if (ret) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; } return HSA_STATUS_SUCCESS; }; @@ -1664,22 +1660,24 @@ hsa_status_t Runtime::IPCAttach(const hsa_amd_ipc_memory_t* handle, size_t len, if (!isDmabufSysMem) return mapMemoryToNodes(0, NULL); // System memory DMA Buf import - auto errCleanup = [&](amdgpu_bo_handle bo) + auto errCleanup = [&](HsaMemoryObjectHandle bo) { - DRM_CALL(amdgpu_bo_free(bo)); // auto frees cpu map + HSAKMT_CALL(hsaKmtMemHandleFree(bo)); return HSA_STATUS_ERROR; }; // Create a shared cpu access pointer for user void *cpuPtr; - amdgpu_bo_handle bo = allocation_map_[importAddress].ldrm_bo; - int ret = DRM_CALL(amdgpu_bo_cpu_map(bo, &cpuPtr)); - if (ret) return errCleanup(bo); - - // Note VA ops will always override flags to allow read/write/exec permissions. - ret = DRM_CALL(amdgpu_bo_va_op(bo, 0, importSize, - reinterpret_cast(cpuPtr), 0, AMDGPU_VA_OP_MAP)); - if (ret) return errCleanup(bo); + HsaMemoryObjectHandle bo = allocation_map_[importAddress].thunk_bo; + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemoryCpuMap(bo, &cpuPtr)); + if (status != HSAKMT_STATUS_SUCCESS) { + return errCleanup(bo); + } + status = HSAKMT_CALL(hsaKmtMemoryVaMap(bo, 0, reinterpret_cast(importSize), + reinterpret_cast(cpuPtr), HSA_MEMORY_ACCESS_NONE)); + if (status != HSAKMT_STATUS_SUCCESS) { + return errCleanup(bo); + } importAddress = cpuPtr; fixFragment(bo); *mapped_ptr = importAddress; @@ -1717,13 +1715,18 @@ hsa_status_t Runtime::IPCDetach(void* ptr) { if (it != allocation_map_.end()) { if (it->second.region != nullptr) return HSA_STATUS_ERROR_INVALID_ARGUMENT; #if defined(__linux__) - if (it->second.ldrm_bo) { - if (DRM_CALL(amdgpu_bo_va_op(it->second.ldrm_bo, 0, it->second.size, - reinterpret_cast(ptr), 0, AMDGPU_VA_OP_UNMAP))) - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - if (DRM_CALL(amdgpu_bo_free(it->second.ldrm_bo))) // auto unmaps from cpu - return HSA_STATUS_ERROR_INVALID_ARGUMENT; - ldrmImportCleaned = true; + if (it->second.thunk_bo) { + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemoryVaUnmap(it->second.thunk_bo, 0, + reinterpret_cast(it->second.size), + reinterpret_cast(ptr))); + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + status = HSAKMT_CALL(hsaKmtMemHandleFree(it->second.thunk_bo)); + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + ldrmImportCleaned = true; } #else assert(!"Unimplemented!"); @@ -2445,7 +2448,7 @@ void Runtime::Unload() { // Close IPC socket server if (ipc_sock_server_conns_.size()) IPCClientImport(getpid(), IPC_SOCK_SERVER_CONN_CLOSE_HANDLE, - 0, NULL, NULL, NULL, false); + 0, nullptr, nullptr, nullptr, false, 0); svm_profile_.reset(nullptr); @@ -2587,28 +2590,13 @@ int fn_amdgpu_device_get_fd_nosupport(HsaAMDGPUDeviceHandle device_handle) { int Runtime::GetAmdgpuDeviceArgs(Agent *agent, ShareableHandle handle, int *drm_fd, uint64_t *cpu_addr) { -#if defined(__linux__) - int renderFd = fn_amdgpu_device_get_fd(static_cast(agent)->libDrmDev()); - if (renderFd < 0) return HSA_STATUS_ERROR; - - uint32_t gem_handle = 0; - if (DRM_CALL(amdgpu_bo_export(reinterpret_cast(handle.handle), - amdgpu_bo_handle_type_kms, &gem_handle))) + auto devhandle = static_cast(agent)->libThunkDev(); + auto memhandle = reinterpret_cast(handle.handle); + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMemoryGetCpuAddr(devhandle, memhandle, + reinterpret_cast(drm_fd), reinterpret_cast(cpu_addr))); + if (status != HSAKMT_STATUS_SUCCESS) { return HSA_STATUS_ERROR; - - union drm_amdgpu_gem_mmap args; - memset(&args, 0, sizeof(args)); - /* Query the buffer address (args.addr_ptr). - * The kernel driver ignores the offset and size parameters. */ - args.in.handle = gem_handle; - if (DRM_CALL(drmCommandWriteRead(renderFd, DRM_AMDGPU_GEM_MMAP, &args, sizeof(args)))) - return HSA_STATUS_ERROR; - - *drm_fd = renderFd; - *cpu_addr = args.out.addr_ptr; -#else - assert(!"Unimplemented!"); -#endif + } return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp index 7598ca211a..95a7586a75 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/thunk_loader.cpp @@ -77,13 +77,13 @@ namespace core { } ThunkLoader::ThunkLoader() - : thunk_handle(NULL), + : thunk_handle(nullptr), library_name(whoami()), is_loaded_(false) { if (!library_name.empty()) { rocr::os::DlError(); // Clear any existing error messages thunk_handle = rocr::os::LoadLib(library_name.c_str()); - if (thunk_handle == NULL) { + if (thunk_handle == nullptr) { fprintf(stderr, "Cannot load %s, failed:%s\n", library_name.c_str(), rocr::os::DlError()); } else { debug_print("Load %s successully!\n", library_name.c_str()); @@ -94,7 +94,7 @@ namespace core { ThunkLoader::~ThunkLoader() { if (IsSharedLibraryLoaded() - && (thunk_handle != NULL)) { + && (thunk_handle != nullptr)) { if (!rocr::os::CloseLib(thunk_handle)) { fprintf(stderr, "Cannot unload %s, failed:%s\n", library_name.c_str(), rocr::os::DlError()); } else { @@ -109,310 +109,328 @@ namespace core { dlerror(); // Clear any existing error messages HSAKMT_PFN(hsaKmtOpenKFD) = (HSAKMT_DEF(hsaKmtOpenKFD)*)dlsym(thunk_handle, "hsaKmtOpenKFD"); - if (HSAKMT_PFN(hsaKmtOpenKFD) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtOpenKFD) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtCloseKFD) = (HSAKMT_DEF(hsaKmtCloseKFD)*)dlsym(thunk_handle, "hsaKmtCloseKFD"); - if (HSAKMT_PFN(hsaKmtCloseKFD) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtCloseKFD) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetVersion) = (HSAKMT_DEF(hsaKmtGetVersion)*)dlsym(thunk_handle, "hsaKmtGetVersion"); - if (HSAKMT_PFN(hsaKmtGetVersion) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetVersion) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAcquireSystemProperties) = (HSAKMT_DEF(hsaKmtAcquireSystemProperties)*)dlsym(thunk_handle, "hsaKmtAcquireSystemProperties"); - if (HSAKMT_PFN(hsaKmtAcquireSystemProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAcquireSystemProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtReleaseSystemProperties) = (HSAKMT_DEF(hsaKmtReleaseSystemProperties)*)dlsym(thunk_handle, "hsaKmtReleaseSystemProperties"); - if (HSAKMT_PFN(hsaKmtReleaseSystemProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtReleaseSystemProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetNodeProperties) = (HSAKMT_DEF(hsaKmtGetNodeProperties)*)dlsym(thunk_handle, "hsaKmtGetNodeProperties"); - if (HSAKMT_PFN(hsaKmtGetNodeProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetNodeProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) = (HSAKMT_DEF(hsaKmtGetNodeMemoryProperties)*)dlsym(thunk_handle, "hsaKmtGetNodeMemoryProperties"); - if (HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetNodeMemoryProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetNodeCacheProperties) = (HSAKMT_DEF(hsaKmtGetNodeCacheProperties)*)dlsym(thunk_handle, "hsaKmtGetNodeCacheProperties"); - if (HSAKMT_PFN(hsaKmtGetNodeCacheProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetNodeCacheProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) = (HSAKMT_DEF(hsaKmtGetNodeIoLinkProperties)*)dlsym(thunk_handle, "hsaKmtGetNodeIoLinkProperties"); - if (HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetNodeIoLinkProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtCreateEvent) = (HSAKMT_DEF(hsaKmtCreateEvent)*)dlsym(thunk_handle, "hsaKmtCreateEvent"); - if (HSAKMT_PFN(hsaKmtCreateEvent) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtCreateEvent) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDestroyEvent) = (HSAKMT_DEF(hsaKmtDestroyEvent)*)dlsym(thunk_handle, "hsaKmtDestroyEvent"); - if (HSAKMT_PFN(hsaKmtDestroyEvent) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDestroyEvent) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetEvent) = (HSAKMT_DEF(hsaKmtSetEvent)*)dlsym(thunk_handle, "hsaKmtSetEvent"); - if (HSAKMT_PFN(hsaKmtSetEvent) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetEvent) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtResetEvent) = (HSAKMT_DEF(hsaKmtResetEvent)*)dlsym(thunk_handle, "hsaKmtResetEvent"); - if (HSAKMT_PFN(hsaKmtResetEvent) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtResetEvent) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtQueryEventState) = (HSAKMT_DEF(hsaKmtQueryEventState)*)dlsym(thunk_handle, "hsaKmtQueryEventState"); - if (HSAKMT_PFN(hsaKmtQueryEventState) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtQueryEventState) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtWaitOnEvent) = (HSAKMT_DEF(hsaKmtWaitOnEvent)*)dlsym(thunk_handle, "hsaKmtWaitOnEvent"); - if (HSAKMT_PFN(hsaKmtWaitOnEvent) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtWaitOnEvent) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents)*)dlsym(thunk_handle, "hsaKmtWaitOnMultipleEvents"); - if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtCreateQueue) = (HSAKMT_DEF(hsaKmtCreateQueue)*)dlsym(thunk_handle, "hsaKmtCreateQueue"); - if (HSAKMT_PFN(hsaKmtCreateQueue) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtCreateQueue) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtCreateQueueExt) = (HSAKMT_DEF(hsaKmtCreateQueueExt)*)dlsym(thunk_handle, "hsaKmtCreateQueueExt"); - if (HSAKMT_PFN(hsaKmtCreateQueueExt) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtCreateQueueExt) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtUpdateQueue) = (HSAKMT_DEF(hsaKmtUpdateQueue)*)dlsym(thunk_handle, "hsaKmtUpdateQueue"); - if (HSAKMT_PFN(hsaKmtUpdateQueue) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtUpdateQueue) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDestroyQueue) = (HSAKMT_DEF(hsaKmtDestroyQueue)*)dlsym(thunk_handle, "hsaKmtDestroyQueue"); - if (HSAKMT_PFN(hsaKmtDestroyQueue) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDestroyQueue) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetQueueCUMask) = (HSAKMT_DEF(hsaKmtSetQueueCUMask)*)dlsym(thunk_handle, "hsaKmtSetQueueCUMask"); - if (HSAKMT_PFN(hsaKmtSetQueueCUMask) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetQueueCUMask) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetMemoryPolicy) = (HSAKMT_DEF(hsaKmtSetMemoryPolicy)*)dlsym(thunk_handle, "hsaKmtSetMemoryPolicy"); - if (HSAKMT_PFN(hsaKmtSetMemoryPolicy) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetMemoryPolicy) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAllocMemory) = (HSAKMT_DEF(hsaKmtAllocMemory)*)dlsym(thunk_handle, "hsaKmtAllocMemory"); - if (HSAKMT_PFN(hsaKmtAllocMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAllocMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAllocMemoryAlign) = (HSAKMT_DEF(hsaKmtAllocMemoryAlign)*)dlsym(thunk_handle, "hsaKmtAllocMemoryAlign"); - if (HSAKMT_PFN(hsaKmtAllocMemoryAlign) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAllocMemoryAlign) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtFreeMemory) = (HSAKMT_DEF(hsaKmtFreeMemory)*)dlsym(thunk_handle, "hsaKmtFreeMemory"); - if (HSAKMT_PFN(hsaKmtFreeMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtFreeMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAvailableMemory) = (HSAKMT_DEF(hsaKmtAvailableMemory)*)dlsym(thunk_handle, "hsaKmtAvailableMemory"); - if (HSAKMT_PFN(hsaKmtAvailableMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAvailableMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterMemory) = (HSAKMT_DEF(hsaKmtRegisterMemory)*)dlsym(thunk_handle, "hsaKmtRegisterMemory"); - if (HSAKMT_PFN(hsaKmtRegisterMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) = (HSAKMT_DEF(hsaKmtRegisterMemoryToNodes)*)dlsym(thunk_handle, "hsaKmtRegisterMemoryToNodes"); - if (HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterMemoryToNodes) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) = (HSAKMT_DEF(hsaKmtRegisterMemoryWithFlags)*)dlsym(thunk_handle, "hsaKmtRegisterMemoryWithFlags"); - if (HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterMemoryWithFlags) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodes)*)dlsym(thunk_handle, "hsaKmtRegisterGraphicsHandleToNodes"); - if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodes) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) = (HSAKMT_DEF(hsaKmtRegisterGraphicsHandleToNodesExt)*)dlsym(thunk_handle, "hsaKmtRegisterGraphicsHandleToNodesExt"); - if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterGraphicsHandleToNodesExt) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtShareMemory) = (HSAKMT_DEF(hsaKmtShareMemory)*)dlsym(thunk_handle, "hsaKmtShareMemory"); - if (HSAKMT_PFN(hsaKmtShareMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtShareMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterSharedHandle) = (HSAKMT_DEF(hsaKmtRegisterSharedHandle)*)dlsym(thunk_handle, "hsaKmtRegisterSharedHandle"); - if (HSAKMT_PFN(hsaKmtRegisterSharedHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterSharedHandle) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) = (HSAKMT_DEF(hsaKmtRegisterSharedHandleToNodes)*)dlsym(thunk_handle, "hsaKmtRegisterSharedHandleToNodes"); - if (HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRegisterSharedHandleToNodes) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtProcessVMRead) = (HSAKMT_DEF(hsaKmtProcessVMRead)*)dlsym(thunk_handle, "hsaKmtProcessVMRead"); - if (HSAKMT_PFN(hsaKmtProcessVMRead) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtProcessVMRead) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtProcessVMWrite) = (HSAKMT_DEF(hsaKmtProcessVMWrite)*)dlsym(thunk_handle, "hsaKmtProcessVMWrite"); - if (HSAKMT_PFN(hsaKmtProcessVMWrite) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtProcessVMWrite) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDeregisterMemory) = (HSAKMT_DEF(hsaKmtDeregisterMemory)*)dlsym(thunk_handle, "hsaKmtDeregisterMemory"); - if (HSAKMT_PFN(hsaKmtDeregisterMemory) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDeregisterMemory) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtMapMemoryToGPU) = (HSAKMT_DEF(hsaKmtMapMemoryToGPU)*)dlsym(thunk_handle, "hsaKmtMapMemoryToGPU"); - if (HSAKMT_PFN(hsaKmtMapMemoryToGPU) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtMapMemoryToGPU) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) = (HSAKMT_DEF(hsaKmtMapMemoryToGPUNodes)*)dlsym(thunk_handle, "hsaKmtMapMemoryToGPUNodes"); - if (HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtMapMemoryToGPUNodes) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) = (HSAKMT_DEF(hsaKmtUnmapMemoryToGPU)*)dlsym(thunk_handle, "hsaKmtUnmapMemoryToGPU"); - if (HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtUnmapMemoryToGPU) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgRegister) = (HSAKMT_DEF(hsaKmtDbgRegister)*)dlsym(thunk_handle, "hsaKmtDbgRegister"); - if (HSAKMT_PFN(hsaKmtDbgRegister) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgRegister) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgUnregister) = (HSAKMT_DEF(hsaKmtDbgUnregister)*)dlsym(thunk_handle, "hsaKmtDbgUnregister"); - if (HSAKMT_PFN(hsaKmtDbgUnregister) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgUnregister) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgWavefrontControl) = (HSAKMT_DEF(hsaKmtDbgWavefrontControl)*)dlsym(thunk_handle, "hsaKmtDbgWavefrontControl"); - if (HSAKMT_PFN(hsaKmtDbgWavefrontControl) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgWavefrontControl) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgAddressWatch) = (HSAKMT_DEF(hsaKmtDbgAddressWatch)*)dlsym(thunk_handle, "hsaKmtDbgAddressWatch"); - if (HSAKMT_PFN(hsaKmtDbgAddressWatch) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgAddressWatch) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgEnable) = (HSAKMT_DEF(hsaKmtDbgEnable)*)dlsym(thunk_handle, "hsaKmtDbgEnable"); - if (HSAKMT_PFN(hsaKmtDbgEnable) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgEnable) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgDisable) = (HSAKMT_DEF(hsaKmtDbgDisable)*)dlsym(thunk_handle, "hsaKmtDbgDisable"); - if (HSAKMT_PFN(hsaKmtDbgDisable) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgDisable) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgGetDeviceData) = (HSAKMT_DEF(hsaKmtDbgGetDeviceData)*)dlsym(thunk_handle, "hsaKmtDbgGetDeviceData"); - if (HSAKMT_PFN(hsaKmtDbgGetDeviceData) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgGetDeviceData) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDbgGetQueueData) = (HSAKMT_DEF(hsaKmtDbgGetQueueData)*)dlsym(thunk_handle, "hsaKmtDbgGetQueueData"); - if (HSAKMT_PFN(hsaKmtDbgGetQueueData) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDbgGetQueueData) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetClockCounters) = (HSAKMT_DEF(hsaKmtGetClockCounters)*)dlsym(thunk_handle, "hsaKmtGetClockCounters"); - if (HSAKMT_PFN(hsaKmtGetClockCounters) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetClockCounters) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcGetCounterProperties) = (HSAKMT_DEF(hsaKmtPmcGetCounterProperties)*)dlsym(thunk_handle, "hsaKmtPmcGetCounterProperties"); - if (HSAKMT_PFN(hsaKmtPmcGetCounterProperties) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcGetCounterProperties) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcRegisterTrace) = (HSAKMT_DEF(hsaKmtPmcRegisterTrace)*)dlsym(thunk_handle, "hsaKmtPmcRegisterTrace"); - if (HSAKMT_PFN(hsaKmtPmcRegisterTrace) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcRegisterTrace) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcUnregisterTrace) = (HSAKMT_DEF(hsaKmtPmcUnregisterTrace)*)dlsym(thunk_handle, "hsaKmtPmcUnregisterTrace"); - if (HSAKMT_PFN(hsaKmtPmcUnregisterTrace) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcUnregisterTrace) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) = (HSAKMT_DEF(hsaKmtPmcAcquireTraceAccess)*)dlsym(thunk_handle, "hsaKmtPmcAcquireTraceAccess"); - if (HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcAcquireTraceAccess) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) = (HSAKMT_DEF(hsaKmtPmcReleaseTraceAccess)*)dlsym(thunk_handle, "hsaKmtPmcReleaseTraceAccess"); - if (HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcReleaseTraceAccess) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcStartTrace) = (HSAKMT_DEF(hsaKmtPmcStartTrace)*)dlsym(thunk_handle, "hsaKmtPmcStartTrace"); - if (HSAKMT_PFN(hsaKmtPmcStartTrace) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcStartTrace) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcQueryTrace) = (HSAKMT_DEF(hsaKmtPmcQueryTrace)*)dlsym(thunk_handle, "hsaKmtPmcQueryTrace"); - if (HSAKMT_PFN(hsaKmtPmcQueryTrace) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcQueryTrace) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPmcStopTrace) = (HSAKMT_DEF(hsaKmtPmcStopTrace)*)dlsym(thunk_handle, "hsaKmtPmcStopTrace"); - if (HSAKMT_PFN(hsaKmtPmcStopTrace) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPmcStopTrace) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtMapGraphicHandle) = (HSAKMT_DEF(hsaKmtMapGraphicHandle)*)dlsym(thunk_handle, "hsaKmtMapGraphicHandle"); - if (HSAKMT_PFN(hsaKmtMapGraphicHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtMapGraphicHandle) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtUnmapGraphicHandle) = (HSAKMT_DEF(hsaKmtUnmapGraphicHandle)*)dlsym(thunk_handle, "hsaKmtUnmapGraphicHandle"); - if (HSAKMT_PFN(hsaKmtUnmapGraphicHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtUnmapGraphicHandle) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetTrapHandler) = (HSAKMT_DEF(hsaKmtSetTrapHandler)*)dlsym(thunk_handle, "hsaKmtSetTrapHandler"); - if (HSAKMT_PFN(hsaKmtSetTrapHandler) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetTrapHandler) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetTileConfig) = (HSAKMT_DEF(hsaKmtGetTileConfig)*)dlsym(thunk_handle, "hsaKmtGetTileConfig"); - if (HSAKMT_PFN(hsaKmtGetTileConfig) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetTileConfig) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtQueryPointerInfo) = (HSAKMT_DEF(hsaKmtQueryPointerInfo)*)dlsym(thunk_handle, "hsaKmtQueryPointerInfo"); - if (HSAKMT_PFN(hsaKmtQueryPointerInfo) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtQueryPointerInfo) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetMemoryUserData) = (HSAKMT_DEF(hsaKmtSetMemoryUserData)*)dlsym(thunk_handle, "hsaKmtSetMemoryUserData"); - if (HSAKMT_PFN(hsaKmtSetMemoryUserData) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetMemoryUserData) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetQueueInfo) = (HSAKMT_DEF(hsaKmtGetQueueInfo)*)dlsym(thunk_handle, "hsaKmtGetQueueInfo"); - if (HSAKMT_PFN(hsaKmtGetQueueInfo) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetQueueInfo) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAllocQueueGWS) = (HSAKMT_DEF(hsaKmtAllocQueueGWS)*)dlsym(thunk_handle, "hsaKmtAllocQueueGWS"); - if (HSAKMT_PFN(hsaKmtAllocQueueGWS) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAllocQueueGWS) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRuntimeEnable) = (HSAKMT_DEF(hsaKmtRuntimeEnable)*)dlsym(thunk_handle, "hsaKmtRuntimeEnable"); - if (HSAKMT_PFN(hsaKmtRuntimeEnable) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRuntimeEnable) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtRuntimeDisable) = (HSAKMT_DEF(hsaKmtRuntimeDisable)*)dlsym(thunk_handle, "hsaKmtRuntimeDisable"); - if (HSAKMT_PFN(hsaKmtRuntimeDisable) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtRuntimeDisable) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) = (HSAKMT_DEF(hsaKmtCheckRuntimeDebugSupport)*)dlsym(thunk_handle, "hsaKmtCheckRuntimeDebugSupport"); - if (HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtCheckRuntimeDebugSupport) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) = (HSAKMT_DEF(hsaKmtGetRuntimeCapabilities)*)dlsym(thunk_handle, "hsaKmtGetRuntimeCapabilities"); - if (HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetRuntimeCapabilities) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtDebugTrapIoctl) = (HSAKMT_DEF(hsaKmtDebugTrapIoctl)*)dlsym(thunk_handle, "hsaKmtDebugTrapIoctl"); - if (HSAKMT_PFN(hsaKmtDebugTrapIoctl) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtDebugTrapIoctl) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSPMAcquire) = (HSAKMT_DEF(hsaKmtSPMAcquire)*)dlsym(thunk_handle, "hsaKmtSPMAcquire"); - if (HSAKMT_PFN(hsaKmtSPMAcquire) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSPMAcquire) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSPMRelease) = (HSAKMT_DEF(hsaKmtSPMRelease)*)dlsym(thunk_handle, "hsaKmtSPMRelease"); - if (HSAKMT_PFN(hsaKmtSPMRelease) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSPMRelease) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSPMSetDestBuffer) = (HSAKMT_DEF(hsaKmtSPMSetDestBuffer)*)dlsym(thunk_handle, "hsaKmtSPMSetDestBuffer"); - if (HSAKMT_PFN(hsaKmtSPMSetDestBuffer) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSPMSetDestBuffer) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSVMSetAttr) = (HSAKMT_DEF(hsaKmtSVMSetAttr)*)dlsym(thunk_handle, "hsaKmtSVMSetAttr"); - if (HSAKMT_PFN(hsaKmtSVMSetAttr) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSVMSetAttr) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSVMGetAttr) = (HSAKMT_DEF(hsaKmtSVMGetAttr)*)dlsym(thunk_handle, "hsaKmtSVMGetAttr"); - if (HSAKMT_PFN(hsaKmtSVMGetAttr) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSVMGetAttr) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtSetXNACKMode) = (HSAKMT_DEF(hsaKmtSetXNACKMode)*)dlsym(thunk_handle, "hsaKmtSetXNACKMode"); - if (HSAKMT_PFN(hsaKmtSetXNACKMode) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtSetXNACKMode) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetXNACKMode) = (HSAKMT_DEF(hsaKmtGetXNACKMode)*)dlsym(thunk_handle, "hsaKmtGetXNACKMode"); - if (HSAKMT_PFN(hsaKmtGetXNACKMode) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetXNACKMode) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtOpenSMI) = (HSAKMT_DEF(hsaKmtOpenSMI)*)dlsym(thunk_handle, "hsaKmtOpenSMI"); - if (HSAKMT_PFN(hsaKmtOpenSMI) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtOpenSMI) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtExportDMABufHandle) = (HSAKMT_DEF(hsaKmtExportDMABufHandle)*)dlsym(thunk_handle, "hsaKmtExportDMABufHandle"); - if (HSAKMT_PFN(hsaKmtExportDMABufHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtExportDMABufHandle) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) = (HSAKMT_DEF(hsaKmtWaitOnEvent_Ext)*)dlsym(thunk_handle, "hsaKmtWaitOnEvent_Ext"); - if (HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtWaitOnEvent_Ext) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) = (HSAKMT_DEF(hsaKmtWaitOnMultipleEvents_Ext)*)dlsym(thunk_handle, "hsaKmtWaitOnMultipleEvents_Ext"); - if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtWaitOnMultipleEvents_Ext) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReplaceAsanHeaderPage)*)dlsym(thunk_handle, "hsaKmtReplaceAsanHeaderPage"); - if (HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtReplaceAsanHeaderPage) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) = (HSAKMT_DEF(hsaKmtReturnAsanHeaderPage)*)dlsym(thunk_handle, "hsaKmtReturnAsanHeaderPage"); - if (HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtReturnAsanHeaderPage) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) = (HSAKMT_DEF(hsaKmtGetAMDGPUDeviceHandle)*)dlsym(thunk_handle, "hsaKmtGetAMDGPUDeviceHandle"); - if (HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetAMDGPUDeviceHandle) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) = (HSAKMT_DEF(hsaKmtPcSamplingQueryCapabilities)*)dlsym(thunk_handle, "hsaKmtPcSamplingQueryCapabilities"); - if (HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingQueryCapabilities) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingCreate) = (HSAKMT_DEF(hsaKmtPcSamplingCreate)*)dlsym(thunk_handle, "hsaKmtPcSamplingCreate"); - if (HSAKMT_PFN(hsaKmtPcSamplingCreate) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingCreate) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingDestroy) = (HSAKMT_DEF(hsaKmtPcSamplingDestroy)*)dlsym(thunk_handle, "hsaKmtPcSamplingDestroy"); - if (HSAKMT_PFN(hsaKmtPcSamplingDestroy) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingDestroy) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingStart) = (HSAKMT_DEF(hsaKmtPcSamplingStart)*)dlsym(thunk_handle, "hsaKmtPcSamplingStart"); - if (HSAKMT_PFN(hsaKmtPcSamplingStart) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingStart) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingStop) = (HSAKMT_DEF(hsaKmtPcSamplingStop)*)dlsym(thunk_handle, "hsaKmtPcSamplingStop"); - if (HSAKMT_PFN(hsaKmtPcSamplingStop) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingStop) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtPcSamplingSupport) = (HSAKMT_DEF(hsaKmtPcSamplingSupport)*)dlsym(thunk_handle, "hsaKmtPcSamplingSupport"); - if (HSAKMT_PFN(hsaKmtPcSamplingSupport) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtPcSamplingSupport) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)dlsym(thunk_handle, "hsaKmtModelEnabled"); - if (HSAKMT_PFN(hsaKmtModelEnabled) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtModelEnabled) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtQueueRingDoorbell) = (HSAKMT_DEF(hsaKmtQueueRingDoorbell)*)dlsym(thunk_handle, "hsaKmtQueueRingDoorbell"); - if (HSAKMT_PFN(hsaKmtQueueRingDoorbell) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtQueueRingDoorbell) == nullptr) goto ERROR; DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)dlsym(thunk_handle, "amdgpu_device_initialize"); - if (DRM_PFN(amdgpu_device_initialize) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_device_initialize) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)dlsym(thunk_handle, "hsaKmtAisReadWriteFile"); - if (HSAKMT_PFN(hsaKmtAisReadWriteFile) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtAisReadWriteFile) == nullptr) goto ERROR; HSAKMT_PFN(hsaKmtGetMemoryHandle) = (HSAKMT_DEF(hsaKmtGetMemoryHandle)*)dlsym(thunk_handle, "hsaKmtGetMemoryHandle"); - if (HSAKMT_PFN(hsaKmtGetMemoryHandle) == NULL) goto ERROR; + if (HSAKMT_PFN(hsaKmtGetMemoryHandle) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtHandleImport) = (HSAKMT_DEF(hsaKmtHandleImport)*)dlsym(thunk_handle, "hsaKmtHandleImport"); + if (HSAKMT_PFN(hsaKmtHandleImport) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtMemoryVaMap) = (HSAKMT_DEF(hsaKmtMemoryVaMap)*)dlsym(thunk_handle, "hsaKmtMemoryVaMap"); + if (HSAKMT_PFN(hsaKmtMemoryVaMap) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtMemoryVaUnmap) = (HSAKMT_DEF(hsaKmtMemoryVaUnmap)*)dlsym(thunk_handle, "hsaKmtMemoryVaUnmap"); + if (HSAKMT_PFN(hsaKmtMemoryVaUnmap) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtMemHandleFree) = (HSAKMT_DEF(hsaKmtMemHandleFree)*)dlsym(thunk_handle, "hsaKmtMemHandleFree"); + if (HSAKMT_PFN(hsaKmtMemHandleFree) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtMemoryGetCpuAddr) = (HSAKMT_DEF(hsaKmtMemoryGetCpuAddr)*)dlsym(thunk_handle, "hsaKmtMemoryGetCpuAddr"); + if (HSAKMT_PFN(hsaKmtMemoryGetCpuAddr) == nullptr) goto ERROR; + + HSAKMT_PFN(hsaKmtMemoryCpuMap) = (HSAKMT_DEF(hsaKmtMemoryCpuMap)*)dlsym(thunk_handle, "hsaKmtMemoryCpuMap"); + if (HSAKMT_PFN(hsaKmtMemoryCpuMap) == nullptr) goto ERROR; DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)dlsym(thunk_handle, "amdgpu_device_deinitialize"); - if (DRM_PFN(amdgpu_device_deinitialize) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_device_deinitialize) == nullptr) goto ERROR; DRM_PFN(amdgpu_query_gpu_info) = (DRM_DEF(amdgpu_query_gpu_info)*)dlsym(thunk_handle, "amdgpu_query_gpu_info"); - if (DRM_PFN(amdgpu_query_gpu_info) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_query_gpu_info) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_cpu_map) = (DRM_DEF(amdgpu_bo_cpu_map)*)dlsym(thunk_handle, "amdgpu_bo_cpu_map"); - if (DRM_PFN(amdgpu_bo_cpu_map) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_bo_cpu_map) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_free) = (DRM_DEF(amdgpu_bo_free)*)dlsym(thunk_handle, "amdgpu_bo_free"); - if (DRM_PFN(amdgpu_bo_free) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_bo_free) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_export) = (DRM_DEF(amdgpu_bo_export)*)dlsym(thunk_handle, "amdgpu_bo_export"); - if (DRM_PFN(amdgpu_bo_export) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_bo_export) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_import) = (DRM_DEF(amdgpu_bo_import)*)dlsym(thunk_handle, "amdgpu_bo_import"); - if (DRM_PFN(amdgpu_bo_import) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_bo_import) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_va_op) = (DRM_DEF(amdgpu_bo_va_op)*)dlsym(thunk_handle, "amdgpu_bo_va_op"); - if (DRM_PFN(amdgpu_bo_va_op) == NULL) goto ERROR; + if (DRM_PFN(amdgpu_bo_va_op) == nullptr) goto ERROR; DRM_PFN(amdgpu_bo_query_info) = (DRM_DEF(amdgpu_bo_query_info)*)dlsym(thunk_handle, "amdgpu_bo_query_info"); if (DRM_PFN(amdgpu_bo_query_info) == NULL) goto ERROR; @@ -421,7 +439,7 @@ namespace core { if (DRM_PFN(amdgpu_bo_set_metadata) == NULL) goto ERROR; DRM_PFN(drmCommandWriteRead) = (DRM_DEF(drmCommandWriteRead)*)dlsym(thunk_handle, "drmCommandWriteRead"); - if (DRM_PFN(drmCommandWriteRead) == NULL) goto ERROR; + if (DRM_PFN(drmCommandWriteRead) == nullptr) goto ERROR; debug_print("Load all DTIF APIs OK!\n"); return; @@ -525,6 +543,12 @@ ERROR: HSAKMT_PFN(hsaKmtModelEnabled) = (HSAKMT_DEF(hsaKmtModelEnabled)*)(&hsaKmtModelEnabled); HSAKMT_PFN(hsaKmtAisReadWriteFile) = (HSAKMT_DEF(hsaKmtAisReadWriteFile)*)(&hsaKmtAisReadWriteFile); HSAKMT_PFN(hsaKmtGetMemoryHandle) = (HSAKMT_DEF(hsaKmtGetMemoryHandle)*)(&hsaKmtGetMemoryHandle); + HSAKMT_PFN(hsaKmtHandleImport) = (HSAKMT_DEF(hsaKmtHandleImport)*)(&hsaKmtHandleImport); + HSAKMT_PFN(hsaKmtMemoryVaMap) = (HSAKMT_DEF(hsaKmtMemoryVaMap)*)(&hsaKmtMemoryVaMap); + HSAKMT_PFN(hsaKmtMemoryVaUnmap) = (HSAKMT_DEF(hsaKmtMemoryVaUnmap)*)(&hsaKmtMemoryVaUnmap); + HSAKMT_PFN(hsaKmtMemHandleFree) = (HSAKMT_DEF(hsaKmtMemHandleFree)*)(&hsaKmtMemHandleFree); + HSAKMT_PFN(hsaKmtMemoryGetCpuAddr) = (HSAKMT_DEF(hsaKmtMemoryGetCpuAddr)*)(&hsaKmtMemoryGetCpuAddr); + HSAKMT_PFN(hsaKmtMemoryCpuMap) = (HSAKMT_DEF(hsaKmtMemoryCpuMap)*)(&hsaKmtMemoryCpuMap); DRM_PFN(amdgpu_device_initialize) = (DRM_DEF(amdgpu_device_initialize)*)(&amdgpu_device_initialize); DRM_PFN(amdgpu_device_deinitialize) = (DRM_DEF(amdgpu_device_deinitialize)*)(&amdgpu_device_deinitialize); @@ -548,8 +572,8 @@ ERROR: DtifCreateFunc* pfnDtifCreate = (DtifCreateFunc*)rocr::os::GetExportAddress(thunk_handle, "DtifCreate"); - if (pfnDtifCreate != NULL) { - if (pfnDtifCreate("HSA") != NULL) { + if (pfnDtifCreate != nullptr) { + if (pfnDtifCreate("HSA") != nullptr) { debug_print("DtifCreate OK!\n"); return true; } else { @@ -564,12 +588,12 @@ ERROR: if (!IsDTIF()) return true; - if (thunk_handle == NULL) + if (thunk_handle == nullptr) return false; DtifDestroyFunc* pfnDtifDestroy = (DtifDestroyFunc*)rocr::os::GetExportAddress(thunk_handle, "DtifDestroy"); - if (pfnDtifDestroy != NULL) { + if (pfnDtifDestroy != nullptr) { pfnDtifDestroy(); debug_print("DtifDestroy OK!\n"); return true;