diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 83b83c977d..afc88a7a19 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -430,6 +430,28 @@ hsa_status_t KfdDriver::ReleaseShareableHandle(core::ShareableHandle &handle) { return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::SPMAcquire(uint32_t preferred_node_id) const { + if (hsaKmtSPMAcquire(preferred_node_id) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::SPMRelease(uint32_t preferred_node_id) const { + if (hsaKmtSPMRelease(preferred_node_id) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, + uint32_t* timeout, uint32_t* size_copied, + void* dest_mem_addr, bool* is_spm_data_loss) const { + if (hsaKmtSPMSetDestBuffer(preferred_node_id, size_bytes, timeout, size_copied, dest_mem_addr, + is_spm_data_loss) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; +} + void *KfdDriver::AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id, size_t size) { void *mem = nullptr; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index f802193516..26b52cd6f3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -671,5 +671,22 @@ hsa_status_t XdnaDriver::SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uin return HSA_STATUS_SUCCESS; } +hsa_status_t XdnaDriver::SPMAcquire(uint32_t preferred_node_id) const { + // AIE does not support streaming performance monitor. + return HSA_STATUS_ERROR_INVALID_AGENT; +} + +hsa_status_t XdnaDriver::SPMRelease(uint32_t preferred_node_id) const { + // AIE does not support streaming performance monitor. + return HSA_STATUS_ERROR_INVALID_AGENT; +}; + +hsa_status_t XdnaDriver::SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, + uint32_t* timeout, uint32_t* size_copied, + void* dest_mem_addr, bool* is_spm_data_loss) const { + // AIE does not support streaming performance monitor. + return HSA_STATUS_ERROR_INVALID_AGENT; +} + } // namespace AMD } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h index 8aaf11f10c..853e305e6c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h @@ -301,7 +301,7 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { __forceinline uint32_t node_id() const { return node_id_; } // @brief Returns the driver associated with this agent. - __forceinline Driver &driver() { return *driver_; } + __forceinline Driver& driver() const { return *driver_; } // @brief Getter for profiling_enabled_. __forceinline bool profiling_enabled() const { return profiling_enabled_; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index 256805fa8e..adf53e6785 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -109,7 +109,13 @@ public: size_t size) override; hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override; -private: + hsa_status_t SPMAcquire(uint32_t preferred_node_id) const override; + hsa_status_t SPMRelease(uint32_t preferred_node_id) const override; + hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, uint32_t* timeout, + uint32_t* size_copied, void* dest_mem_addr, + bool* is_spm_data_loss) const override; + + private: /// @brief Allocate agent accessible memory (system / local memory). static void *AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id, size_t size); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index 6726c5aba1..b9c4cd68aa 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -174,10 +174,16 @@ public: size_t size) override; hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) override; - // @brief Submits num_pkts packets in a command chain to the XDNA driver + /// @brief Submits num_pkts packets in a command chain to the XDNA driver hsa_status_t SubmitCmdChain(hsa_amd_aie_ert_packet_t* first_pkt, uint32_t num_pkts, uint32_t num_operands, uint32_t hw_ctx_handle); + hsa_status_t SPMAcquire(uint32_t preferred_node_id) const override; + hsa_status_t SPMRelease(uint32_t preferred_node_id) const override; + hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, uint32_t* timeout, + uint32_t* size_copied, void* dest_mem_addr, + bool* is_spm_data_loss) const override; + private: hsa_status_t QueryDriverVersion(); /// @brief Allocate device accesible heap space. diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index 024b33a5c0..52373bd829 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -193,6 +193,24 @@ public: virtual hsa_status_t ReleaseShareableHandle(core::ShareableHandle &handle) = 0; + /// @brief Acquire a streaming performance monitor on an agent. + /// @param[in] preferred_node_id Node ID of the preferred agent. + virtual hsa_status_t SPMAcquire(uint32_t preferred_node_id) const = 0; + /// @brief Release a streaming performance monitor on an agent. + /// @param[in] preferred_node_id Node ID of the preferred agent. + virtual hsa_status_t SPMRelease(uint32_t preferred_node_id) const = 0; + /// @brief Setup the destination user-mode buffer for streaming performance monitor data. + /// @param[in] preferred_node_id Node ID of the preferred agent. + /// @param[in] size_bytes Size of the destination buffer in bytes. + /// @param[in, out] timeout Timeout in milliseconds. + /// @param[out] size_copied Size of data copied in bytes. + /// @param[in] dest_mem_addr Destination address for streaming performance data. Set to NULL to + /// stop copy on previous buffer. + /// @param[out] is_spm_data_loss Data was lost if true. + virtual hsa_status_t SPMSetDestBuffer(uint32_t preferred_node_id, uint32_t size_bytes, + uint32_t* timeout, uint32_t* size_copied, + void* dest_mem_addr, bool* is_spm_data_loss) const = 0; + /// Unique identifier for supported kernel-mode drivers. const DriverType kernel_driver_type_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index f49df6b73c..be5a5dcaa1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -1218,11 +1218,11 @@ hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent) { TRY; IS_OPEN(); const core::Agent* agent = core::Agent::Convert(preferred_agent); + // Currently, the SPM API is only supported for GPU agents. if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) return HSA_STATUS_ERROR_INVALID_AGENT; - if (hsaKmtSPMAcquire(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; - return HSA_STATUS_SUCCESS; + return agent->driver().SPMAcquire(agent->node_id()); CATCH; } @@ -1232,12 +1232,11 @@ hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent) { IS_OPEN(); const core::Agent* agent = core::Agent::Convert(preferred_agent); + // Currently, the SPM API is only supported for GPU agents. if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) return HSA_STATUS_ERROR_INVALID_AGENT; - if (hsaKmtSPMRelease(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; - - return HSA_STATUS_SUCCESS; + return agent->driver().SPMRelease(agent->node_id()); CATCH; } @@ -1249,14 +1248,12 @@ hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t siz IS_OPEN(); const core::Agent* agent = core::Agent::Convert(preferred_agent); + // Currently, the SPM API is only supported for GPU agents. if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) return HSA_STATUS_ERROR_INVALID_AGENT; - if (hsaKmtSPMSetDestBuffer(agent->node_id(), size_in_bytes, timeout, size_copied, dest, - is_data_loss) != HSAKMT_STATUS_SUCCESS) - return HSA_STATUS_ERROR; - - return HSA_STATUS_SUCCESS; + return agent->driver().SPMSetDestBuffer(agent->node_id(), size_in_bytes, timeout, size_copied, + dest, is_data_loss); CATCH; }