diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 48506c60e9..8271cb5904 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -574,6 +574,15 @@ hsa_status_t KfdDriver::SetTrapHandler(uint32_t node_id, const void* base, uint6 return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const { + assert(device_handle); + + if (HSAKMT_CALL(hsaKmtGetAMDGPUDeviceHandle(node_id, reinterpret_cast(device_handle))) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const { // AIE does not support streaming performance monitor. HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index cec114df16..52c50d92f4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -877,5 +877,9 @@ hsa_status_t XdnaDriver::SetTrapHandler(uint32_t node_id, const void* base, uint return HSA_STATUS_ERROR; } +hsa_status_t XdnaDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const { + return HSA_STATUS_ERROR; +} + } // namespace AMD } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index 8eac1e38ff..7932f373ff 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -124,6 +124,7 @@ public: bool* is_spm_data_loss) const override; hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size, const void* buffer_base, uint64_t buffer_base_size) const override; + hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override; hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index 814bd0e1a3..42833e5aef 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -238,6 +238,7 @@ public: bool* is_spm_data_loss) const override; hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size, const void* buffer_base, uint64_t buffer_base_size) const override; + hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override; hsa_status_t IsModelEnabled(bool* enable) const override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index 1b578a54dd..21add45cb5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -269,6 +269,13 @@ public: virtual hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size, const void* buffer_base, uint64_t buffer_base_size) const = 0; + /// @brief Gets the device handle for a specific node. + /// @param node_id Node ID of the agent + /// @param device_handle Device handle + /// @return HSA_STATUS_SUCCESS if the driver successfully returns the device + virtual hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const = 0; + + /// @brief Check if the HSA KMT Model is enabled /// @param[out] enable True if the model is enabled, false otherwise virtual hsa_status_t IsModelEnabled(bool* enable) const = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index bee8664090..013fa119f5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -601,12 +601,12 @@ void GpuAgent::InitCacheList() { } void GpuAgent::InitLibDrm() { - HSAKMT_STATUS status; + hsa_status_t status; HsaAMDGPUDeviceHandle device_handle; - status = HSAKMT_CALL(hsaKmtGetAMDGPUDeviceHandle(node_id(), &device_handle)); - if (status != HSAKMT_STATUS_SUCCESS) - throw AMD::hsa_exception(HSA_STATUS_ERROR, + status = driver().GetDeviceHandle(node_id(), &device_handle); + if (status != HSA_STATUS_SUCCESS) + throw AMD::hsa_exception(status, "Agent creation failed.\nlibdrm get device handle failed.\n"); ldrm_dev_ = (amdgpu_device_handle)device_handle;