diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index 178c6645d0..0181c97cd7 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -87,7 +87,7 @@ if (ROCM_CCACHE_BUILD) endif() # if (ROCM_CCACHE_BUILD) ## Get version strings -get_version ( "1.7.0" ) +get_version ( "1.8.0" ) if ( ${ROCM_PATCH_VERSION} ) set ( VERSION_PATCH ${ROCM_PATCH_VERSION}) endif() diff --git a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index 7ea5966ed0..b90e3f072f 100644 --- a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -1178,6 +1178,24 @@ hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agen return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal); } +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent) { + return amdExtTable->hsa_amd_spm_acquire_fn(agent); +} + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent) { + return amdExtTable->hsa_amd_spm_release_fn(agent); +} + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout, + uint32_t* size_copied, void* dest, + bool* is_data_loss) { + return amdExtTable->hsa_amd_spm_set_dest_buffer_fn(agent, size, timeout, size_copied, dest, + is_data_loss); +} + // Tools only table interfaces. namespace rocr { diff --git a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index dfdd078947..a0ab357e01 100644 --- a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -265,6 +265,17 @@ hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agen uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal); +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent); + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent); + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout, + uint32_t* size_copied, void* dest, + bool* is_data_loss); + } // namespace amd } // namespace rocr diff --git a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index ab6a1f31b9..bc3257540c 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -395,6 +395,9 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_svm_attributes_set_fn = AMD::hsa_amd_svm_attributes_set; amd_ext_api.hsa_amd_svm_attributes_get_fn = AMD::hsa_amd_svm_attributes_get; amd_ext_api.hsa_amd_svm_prefetch_async_fn = AMD::hsa_amd_svm_prefetch_async; + amd_ext_api.hsa_amd_spm_acquire_fn = AMD::hsa_amd_spm_acquire; + amd_ext_api.hsa_amd_spm_release_fn = AMD::hsa_amd_spm_release; + amd_ext_api.hsa_amd_spm_set_dest_buffer_fn = AMD::hsa_amd_spm_set_dest_buffer; } void LoadInitialHsaApiTable() { diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 2d39933577..5555546060 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -1069,5 +1069,52 @@ hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agen CATCH; } +hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent) { + TRY; + IS_OPEN(); + const core::Agent* agent = core::Agent::Convert(preferred_agent); + if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) + return HSA_STATUS_ERROR_INVALID_AGENT; + + if (hsaKmtSPMAcquire(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + return HSA_STATUS_SUCCESS; + + CATCH; +} + +hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent) { + TRY; + IS_OPEN(); + + const core::Agent* agent = core::Agent::Convert(preferred_agent); + if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) + return HSA_STATUS_ERROR_INVALID_AGENT; + + if (hsaKmtSPMRelease(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; + + CATCH; +} + +hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes, + uint32_t* timeout, uint32_t* size_copied, void* dest, + bool* is_data_loss) { + TRY; + IS_OPEN(); + + const core::Agent* agent = core::Agent::Convert(preferred_agent); + if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice) + return HSA_STATUS_ERROR_INVALID_AGENT; + + if (hsaKmtSPMSetDestBuffer(agent->node_id(), size_in_bytes, timeout, size_copied, dest, + is_data_loss) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; + + CATCH; +} + } // namespace amd } // namespace rocr diff --git a/runtime/hsa-runtime/hsacore.so.def b/runtime/hsa-runtime/hsacore.so.def index 16aa30b3e3..9a57be2936 100644 --- a/runtime/hsa-runtime/hsacore.so.def +++ b/runtime/hsa-runtime/hsacore.so.def @@ -226,6 +226,9 @@ global: hsa_amd_svm_attributes_set; hsa_amd_svm_attributes_get; hsa_amd_svm_prefetch_async; + hsa_amd_spm_acquire; + hsa_amd_spm_release; + hsa_amd_spm_set_dest_buffer; local: *; diff --git a/runtime/hsa-runtime/inc/hsa_api_trace.h b/runtime/hsa-runtime/inc/hsa_api_trace.h index 451204f412..f02322215f 100644 --- a/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -186,6 +186,9 @@ struct AmdExtTable { decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn; decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn; decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn; + decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn; + decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn; + decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn; decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn; }; diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h index 38fcb8aad0..527f25d6d5 100644 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -2411,6 +2411,59 @@ hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agen uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal); +/** + * @brief Acquire Stream Performance Monitor on an agent + * + * Acquire exclusive use of SPM on @p preferred_agent. + * See hsa_amd_spm_set_dest_buffer to provide a destination buffer to KFD to start recording and + * retrieve this data. + * @param[in] preferred_agent Agent on which to acquire SPM + */ +hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent); + +/** + * @brief Release Stream Performance Monitor on an agent + * + * Release exclusive use of SPM on @p preferred_agent. This will stop KFD writing SPM data. + * If a destination buffer is set, then data in the destination buffer is available to user + * when this function returns. + * + * @param[in] preferred_agent Agent on which to release SPM + */ +hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent); + +/** + * @brief Set up the current destination user mode buffer for stream performance + * counter data. KFD will start writing SPM data into the destination buffer. KFD will continue + * to copy data into the current destination buffer until any of the following functions are called + * - hsa_amd_spm_release + * - hsa_amd_spm_set_dest_buffer with dest set to NULL + * - hsa_amd_spm_set_dest_buffer with dest set to a new buffer + * + * if @p timeout is non-0, the call will wait for up to @p timeout ms for the previous + * buffer to be filled. If previous buffer to be filled before timeout, the @p timeout + * will be updated value with the time remaining. If the timeout is exceeded, the function + * copies any partial data available into the previous user buffer and returns success. + * User should not access destination data while KFD is copying data. + * If the previous destination buffer was full, then @p is_data_loss flag is set. + * @p dest is CPU accessible memory. It could be malloc'ed memory or host allocated memory + * + * @param[in] preferred_agent Agent on which to set the dest buffer + * + * @param[in] size_in_bytes size of the buffer + * + * @param[in/out] timeout timeout in milliseconds + * + * @param[out] size_copied number of bytes copied + * + * @param[in] dest destination address. Set to NULL to stop copy on previous buffer + * + * @param[out] is_data_loss true is data was lost + */ +hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes, + uint32_t* timeout, uint32_t* size_copied, void* dest, + bool* is_data_loss); + #ifdef __cplusplus } // end extern "C" block #endif