rocr/driver: add PC sampling support to driver interface
Add PC sampling functionality to the driver interface: 1. Add new PC sampling methods to Driver base class: - PcSamplingQueryCapabilities - PcSamplingCreate - PcSamplingDestroy - PcSamplingStart - PcSamplingStop 2. Implement PC sampling methods in KfdDriver using HSAKMT APIs: - Map HSAKMT status codes to HSA status codes - Handle resource busy conditions - Proper error handling for all operations Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>
Этот коммит содержится в:
коммит произвёл
Huang, Honglei1
родитель
a47c060d6a
Коммит
56cb9390ff
@@ -730,5 +730,64 @@ hsa_status_t KfdDriver::ReturnAsanHeaderPage(void* mem) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
|
||||
uint32_t sample_info_sz,
|
||||
uint32_t* sz_needed) const {
|
||||
HSAKMT_STATUS status = HSAKMT_CALL(
|
||||
hsaKmtPcSamplingQueryCapabilities(node_id, sample_info, sample_info_sz, sz_needed));
|
||||
if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
|
||||
}
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
|
||||
uint32_t* trace_id) const {
|
||||
HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingCreate(node_id, sample_info, trace_id));
|
||||
if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
|
||||
}
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const {
|
||||
HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingDestroy(node_id, trace_id));
|
||||
if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
|
||||
}
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::PcSamplingStart(uint32_t node_id, uint32_t trace_id) const {
|
||||
HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStart(node_id, trace_id));
|
||||
if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
|
||||
}
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::PcSamplingStop(uint32_t node_id, uint32_t trace_id) const {
|
||||
HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStop(node_id, trace_id));
|
||||
if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) {
|
||||
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_RESOURCE_BUSY);
|
||||
}
|
||||
if (status != HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace AMD
|
||||
} // namespace rocr
|
||||
|
||||
@@ -141,6 +141,14 @@ public:
|
||||
uint64_t* size) const override;
|
||||
hsa_status_t ReplaceAsanHeaderPage(void* mem) const override;
|
||||
hsa_status_t ReturnAsanHeaderPage(void* mem) const override;
|
||||
hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
|
||||
uint32_t sample_info_sz,
|
||||
uint32_t* sz_needed) const override;
|
||||
hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
|
||||
uint32_t* trace_id) const override;
|
||||
hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const override;
|
||||
hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const override;
|
||||
hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const override;
|
||||
|
||||
hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
|
||||
|
||||
|
||||
@@ -386,6 +386,57 @@ public:
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// @brief Queries the PC sampling capabilities.
|
||||
/// @param[in] node_id Node ID of the agent
|
||||
/// @param[in] sample_info Pointer to the sample information
|
||||
/// @param[in] sample_info_sz Size of the sample information
|
||||
/// @param[out] sz_needed Size of the sample information needed
|
||||
/// @return HSA_STATUS_SUCCESS if the PC sampling capabilities were successfully queried, or an
|
||||
/// error code.
|
||||
virtual hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info,
|
||||
uint32_t sample_info_sz,
|
||||
uint32_t* sz_needed) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// @brief Creates a PC sampling session.
|
||||
/// @param[in] node_id Node ID of the agent
|
||||
/// @param[in] sample_info Pointer to the sample information
|
||||
/// @param[out] trace_id Pointer to the trace ID
|
||||
/// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully created, or an error
|
||||
/// code.
|
||||
virtual hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info,
|
||||
uint32_t* trace_id) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// @brief Destroys a PC sampling session.
|
||||
/// @param[in] node_id Node ID of the agent
|
||||
/// @param[in] trace_id Trace ID of the PC sampling session
|
||||
/// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully destroyed, or an error
|
||||
/// code.
|
||||
virtual hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// @brief Starts a PC sampling session.
|
||||
/// @param[in] node_id Node ID of the agent
|
||||
/// @param[in] trace_id Trace ID of the PC sampling session
|
||||
/// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully started, or an error
|
||||
/// code.
|
||||
virtual hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// @brief Stops a PC sampling session.
|
||||
/// @param[in] node_id Node ID of the agent
|
||||
/// @param[in] trace_id Trace ID of the PC sampling session
|
||||
/// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully stopped, or an error
|
||||
/// code.
|
||||
virtual hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
/// Unique identifier for supported kernel-mode drivers.
|
||||
const DriverType kernel_driver_type_;
|
||||
|
||||
|
||||
@@ -2553,14 +2553,14 @@ hsa_status_t GpuAgent::PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configura
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// First query to get size of list needed
|
||||
HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtPcSamplingQueryCapabilities(node_id(), NULL, 0, &size));
|
||||
if (ret != HSAKMT_STATUS_SUCCESS || size == 0) return HSA_STATUS_ERROR;
|
||||
hsa_status_t ret = driver().PcSamplingQueryCapabilities(node_id(), NULL, 0, &size);
|
||||
if (ret != HSA_STATUS_SUCCESS || size == 0) return ret;
|
||||
|
||||
std::vector<HsaPcSamplingInfo> sampleInfoList(size);
|
||||
ret = HSAKMT_CALL(hsaKmtPcSamplingQueryCapabilities(node_id(), sampleInfoList.data(), sampleInfoList.size(),
|
||||
&size));
|
||||
ret = driver().PcSamplingQueryCapabilities(node_id(), sampleInfoList.data(),
|
||||
sampleInfoList.size(), &size);
|
||||
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
|
||||
if (ret != HSA_STATUS_SUCCESS) return ret;
|
||||
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
hsa_ven_amd_pcs_configuration_t hsaPcSampling;
|
||||
@@ -2586,10 +2586,9 @@ hsa_status_t GpuAgent::PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& sess
|
||||
|
||||
// Pass the sampling information to the kernel driver to create PC
|
||||
// sampling session.
|
||||
HSAKMT_STATUS retkmt = HSAKMT_CALL(hsaKmtPcSamplingCreate(node_id(), &sampleInfo, &thunkId));
|
||||
if (retkmt != HSAKMT_STATUS_SUCCESS) {
|
||||
return (retkmt == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) ? (hsa_status_t)HSA_STATUS_ERROR_RESOURCE_BUSY
|
||||
: HSA_STATUS_ERROR;
|
||||
ret = driver().PcSamplingCreate(node_id(), &sampleInfo, &thunkId);
|
||||
if (ret != HSA_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
debug_print("Created PC sampling session with thunkId:%d\n", thunkId);
|
||||
@@ -2795,7 +2794,7 @@ hsa_status_t GpuAgent::PcSamplingCreateFromId(HsaPcSamplingTraceId ioctlId,
|
||||
hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
if (PcSamplingStop(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR;
|
||||
|
||||
HSAKMT_STATUS retKmt = HSAKMT_CALL(hsaKmtPcSamplingDestroy(node_id(), session.ThunkId()));
|
||||
hsa_status_t ret = driver().PcSamplingDestroy(node_id(), session.ThunkId());
|
||||
hsa_ven_amd_pcs_method_kind_t sampling_method = session.method();
|
||||
|
||||
pcs_data_t* pcs_data = nullptr;
|
||||
@@ -2827,7 +2826,7 @@ hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& ses
|
||||
// Update the trap handler to clear any associated device data
|
||||
UpdateTrapHandlerWithPCS(nullptr, nullptr);
|
||||
|
||||
return (retKmt == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) {
|
||||
@@ -2894,8 +2893,9 @@ hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& sessi
|
||||
}
|
||||
|
||||
// Start the sampling session in the kernel driver
|
||||
if (HSAKMT_CALL(hsaKmtPcSamplingStart(node_id(), session.ThunkId())) == HSAKMT_STATUS_SUCCESS)
|
||||
if (driver().PcSamplingStart(node_id(), session.ThunkId()) == HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
debug_print("Failed to start PC sampling session with thunkId:%d\n", session.ThunkId());
|
||||
// Clean up if starting the session failed
|
||||
@@ -2915,8 +2915,8 @@ hsa_status_t GpuAgent::PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& sessio
|
||||
session.stop();
|
||||
|
||||
// Stop PC sampling in the kernel driver
|
||||
HSAKMT_STATUS retKmt = HSAKMT_CALL(hsaKmtPcSamplingStop(node_id(), session.ThunkId()));
|
||||
if (retKmt != HSAKMT_STATUS_SUCCESS)
|
||||
hsa_status_t ret = driver().PcSamplingStop(node_id(), session.ThunkId());
|
||||
if (ret != HSA_STATUS_SUCCESS)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to stop PC Sampling session.");
|
||||
|
||||
// Determine the sampling method and corresponding data
|
||||
|
||||
Ссылка в новой задаче
Block a user