diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 2e2546bf10..ba4700340d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -730,5 +730,64 @@ hsa_status_t KfdDriver::ReturnAsanHeaderPage(void* mem) const { return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info, + uint32_t sample_info_sz, + uint32_t* sz_needed) const { + HSAKMT_STATUS status = HSAKMT_CALL( + hsaKmtPcSamplingQueryCapabilities(node_id, sample_info, sample_info_sz, sz_needed)); + if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) { + return static_cast(HSA_STATUS_ERROR_RESOURCE_BUSY); + } + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info, + uint32_t* trace_id) const { + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingCreate(node_id, sample_info, trace_id)); + if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) { + return static_cast(HSA_STATUS_ERROR_RESOURCE_BUSY); + } + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const { + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingDestroy(node_id, trace_id)); + if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) { + return static_cast(HSA_STATUS_ERROR_RESOURCE_BUSY); + } + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::PcSamplingStart(uint32_t node_id, uint32_t trace_id) const { + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStart(node_id, trace_id)); + if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) { + return static_cast(HSA_STATUS_ERROR_RESOURCE_BUSY); + } + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::PcSamplingStop(uint32_t node_id, uint32_t trace_id) const { + HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtPcSamplingStop(node_id, trace_id)); + if (status == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) { + return static_cast(HSA_STATUS_ERROR_RESOURCE_BUSY); + } + if (status != HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + } // namespace AMD } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index 7f3f59f8d4..6a78aac6de 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -141,6 +141,14 @@ public: uint64_t* size) const override; hsa_status_t ReplaceAsanHeaderPage(void* mem) const override; hsa_status_t ReturnAsanHeaderPage(void* mem) const override; + hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info, + uint32_t sample_info_sz, + uint32_t* sz_needed) const override; + hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info, + uint32_t* trace_id) const override; + hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const override; + hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const override; + hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const override; hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index 151edf6a98..e2265126bc 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -386,6 +386,57 @@ public: return HSA_STATUS_ERROR_INVALID_AGENT; } + /// @brief Queries the PC sampling capabilities. + /// @param[in] node_id Node ID of the agent + /// @param[in] sample_info Pointer to the sample information + /// @param[in] sample_info_sz Size of the sample information + /// @param[out] sz_needed Size of the sample information needed + /// @return HSA_STATUS_SUCCESS if the PC sampling capabilities were successfully queried, or an + /// error code. + virtual hsa_status_t PcSamplingQueryCapabilities(uint32_t node_id, void* sample_info, + uint32_t sample_info_sz, + uint32_t* sz_needed) const { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + /// @brief Creates a PC sampling session. + /// @param[in] node_id Node ID of the agent + /// @param[in] sample_info Pointer to the sample information + /// @param[out] trace_id Pointer to the trace ID + /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully created, or an error + /// code. + virtual hsa_status_t PcSamplingCreate(uint32_t node_id, HsaPcSamplingInfo* sample_info, + uint32_t* trace_id) const { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + /// @brief Destroys a PC sampling session. + /// @param[in] node_id Node ID of the agent + /// @param[in] trace_id Trace ID of the PC sampling session + /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully destroyed, or an error + /// code. + virtual hsa_status_t PcSamplingDestroy(uint32_t node_id, uint32_t trace_id) const { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + /// @brief Starts a PC sampling session. + /// @param[in] node_id Node ID of the agent + /// @param[in] trace_id Trace ID of the PC sampling session + /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully started, or an error + /// code. + virtual hsa_status_t PcSamplingStart(uint32_t node_id, uint32_t trace_id) const { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + /// @brief Stops a PC sampling session. + /// @param[in] node_id Node ID of the agent + /// @param[in] trace_id Trace ID of the PC sampling session + /// @return HSA_STATUS_SUCCESS if the PC sampling session was successfully stopped, or an error + /// code. + virtual hsa_status_t PcSamplingStop(uint32_t node_id, uint32_t trace_id) const { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + /// Unique identifier for supported kernel-mode drivers. const DriverType kernel_driver_type_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 1b6ecb8b86..31b2f11c81 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -2553,14 +2553,14 @@ hsa_status_t GpuAgent::PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configura return HSA_STATUS_ERROR; // First query to get size of list needed - HSAKMT_STATUS ret = HSAKMT_CALL(hsaKmtPcSamplingQueryCapabilities(node_id(), NULL, 0, &size)); - if (ret != HSAKMT_STATUS_SUCCESS || size == 0) return HSA_STATUS_ERROR; + hsa_status_t ret = driver().PcSamplingQueryCapabilities(node_id(), NULL, 0, &size); + if (ret != HSA_STATUS_SUCCESS || size == 0) return ret; std::vector sampleInfoList(size); - ret = HSAKMT_CALL(hsaKmtPcSamplingQueryCapabilities(node_id(), sampleInfoList.data(), sampleInfoList.size(), - &size)); + ret = driver().PcSamplingQueryCapabilities(node_id(), sampleInfoList.data(), + sampleInfoList.size(), &size); - if (ret != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + if (ret != HSA_STATUS_SUCCESS) return ret; for (uint32_t i = 0; i < size; i++) { hsa_ven_amd_pcs_configuration_t hsaPcSampling; @@ -2586,10 +2586,9 @@ hsa_status_t GpuAgent::PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& sess // Pass the sampling information to the kernel driver to create PC // sampling session. - HSAKMT_STATUS retkmt = HSAKMT_CALL(hsaKmtPcSamplingCreate(node_id(), &sampleInfo, &thunkId)); - if (retkmt != HSAKMT_STATUS_SUCCESS) { - return (retkmt == HSAKMT_STATUS_KERNEL_ALREADY_OPENED) ? (hsa_status_t)HSA_STATUS_ERROR_RESOURCE_BUSY - : HSA_STATUS_ERROR; + ret = driver().PcSamplingCreate(node_id(), &sampleInfo, &thunkId); + if (ret != HSA_STATUS_SUCCESS) { + return ret; } debug_print("Created PC sampling session with thunkId:%d\n", thunkId); @@ -2795,7 +2794,7 @@ hsa_status_t GpuAgent::PcSamplingCreateFromId(HsaPcSamplingTraceId ioctlId, hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) { if (PcSamplingStop(session) != HSA_STATUS_SUCCESS) return HSA_STATUS_ERROR; - HSAKMT_STATUS retKmt = HSAKMT_CALL(hsaKmtPcSamplingDestroy(node_id(), session.ThunkId())); + hsa_status_t ret = driver().PcSamplingDestroy(node_id(), session.ThunkId()); hsa_ven_amd_pcs_method_kind_t sampling_method = session.method(); pcs_data_t* pcs_data = nullptr; @@ -2827,7 +2826,7 @@ hsa_status_t GpuAgent::PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& ses // Update the trap handler to clear any associated device data UpdateTrapHandlerWithPCS(nullptr, nullptr); - return (retKmt == HSAKMT_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + return ret; } hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) { @@ -2894,8 +2893,9 @@ hsa_status_t GpuAgent::PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& sessi } // Start the sampling session in the kernel driver - if (HSAKMT_CALL(hsaKmtPcSamplingStart(node_id(), session.ThunkId())) == HSAKMT_STATUS_SUCCESS) + if (driver().PcSamplingStart(node_id(), session.ThunkId()) == HSA_STATUS_SUCCESS) { return HSA_STATUS_SUCCESS; + } debug_print("Failed to start PC sampling session with thunkId:%d\n", session.ThunkId()); // Clean up if starting the session failed @@ -2915,8 +2915,8 @@ hsa_status_t GpuAgent::PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& sessio session.stop(); // Stop PC sampling in the kernel driver - HSAKMT_STATUS retKmt = HSAKMT_CALL(hsaKmtPcSamplingStop(node_id(), session.ThunkId())); - if (retKmt != HSAKMT_STATUS_SUCCESS) + hsa_status_t ret = driver().PcSamplingStop(node_id(), session.ThunkId()); + if (ret != HSA_STATUS_SUCCESS) throw AMD::hsa_exception(HSA_STATUS_ERROR, "Failed to stop PC Sampling session."); // Determine the sampling method and corresponding data