rocr/driver: add GetDeviceHandle to driver interface

This commit introduces a new GetDeviceHandle API to the driver
interface, allowing retrieval of the device handle for a
specific node.

- Implemented GetDeviceHandle in KfdDriver to fetch the AMD GPU
  device handle using hsaKmtGetAMDGPUDeviceHandle.
- Added a stub implementation of GetDeviceHandle in XdnaDriver
  that returns HSA_STATUS_ERROR.
- Modified GpuAgent::InitLibDrm to use driver().GetDeviceHandle
  instead of directly calling hsaKmtGetAMDGPUDeviceHandle.

Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>


[ROCm/ROCR-Runtime commit: 05b83e72d9]
Этот коммит содержится в:
Honglei Huang
2025-07-01 14:00:37 +08:00
коммит произвёл Huang, Honglei1
родитель d675a9e3a0
Коммит bacf61dde9
6 изменённых файлов: 26 добавлений и 4 удалений
+9
Просмотреть файл
@@ -574,6 +574,15 @@ hsa_status_t KfdDriver::SetTrapHandler(uint32_t node_id, const void* base, uint6
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const {
assert(device_handle);
if (HSAKMT_CALL(hsaKmtGetAMDGPUDeviceHandle(node_id, reinterpret_cast<HsaAMDGPUDeviceHandle*>(device_handle))) != HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const {
// AIE does not support streaming performance monitor.
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
+4
Просмотреть файл
@@ -877,5 +877,9 @@ hsa_status_t XdnaDriver::SetTrapHandler(uint32_t node_id, const void* base, uint
return HSA_STATUS_ERROR;
}
hsa_status_t XdnaDriver::GetDeviceHandle(uint32_t node_id, void** device_handle) const {
return HSA_STATUS_ERROR;
}
} // namespace AMD
} // namespace rocr
+1
Просмотреть файл
@@ -124,6 +124,7 @@ public:
bool* is_spm_data_loss) const override;
hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
const void* buffer_base, uint64_t buffer_base_size) const override;
hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override;
hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
+1
Просмотреть файл
@@ -238,6 +238,7 @@ public:
bool* is_spm_data_loss) const override;
hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
const void* buffer_base, uint64_t buffer_base_size) const override;
hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const override;
hsa_status_t IsModelEnabled(bool* enable) const override;
+7
Просмотреть файл
@@ -269,6 +269,13 @@ public:
virtual hsa_status_t SetTrapHandler(uint32_t node_id, const void* base, uint64_t base_size,
const void* buffer_base, uint64_t buffer_base_size) const = 0;
/// @brief Gets the device handle for a specific node.
/// @param node_id Node ID of the agent
/// @param device_handle Device handle
/// @return HSA_STATUS_SUCCESS if the driver successfully returns the device
virtual hsa_status_t GetDeviceHandle(uint32_t node_id, void** device_handle) const = 0;
/// @brief Check if the HSA KMT Model is enabled
/// @param[out] enable True if the model is enabled, false otherwise
virtual hsa_status_t IsModelEnabled(bool* enable) const = 0;
+4 -4
Просмотреть файл
@@ -601,12 +601,12 @@ void GpuAgent::InitCacheList() {
}
void GpuAgent::InitLibDrm() {
HSAKMT_STATUS status;
hsa_status_t status;
HsaAMDGPUDeviceHandle device_handle;
status = HSAKMT_CALL(hsaKmtGetAMDGPUDeviceHandle(node_id(), &device_handle));
if (status != HSAKMT_STATUS_SUCCESS)
throw AMD::hsa_exception(HSA_STATUS_ERROR,
status = driver().GetDeviceHandle(node_id(), &device_handle);
if (status != HSA_STATUS_SUCCESS)
throw AMD::hsa_exception(status,
"Agent creation failed.\nlibdrm get device handle failed.\n");
ldrm_dev_ = (amdgpu_device_handle)device_handle;