Add Stream Performance Monitor(SPM) APIs

Change-Id: I0d48782887814ef245b7e0182e2d5570aa8c3f50
This commit is contained in:
David Yat Sin
2022-11-23 22:56:26 +00:00
parent ecdebef0b9
commit 6bfe57aeb2
8 ha cambiato i file con 139 aggiunte e 1 eliminazioni
+1 -1
Vedi File
@@ -87,7 +87,7 @@ if (ROCM_CCACHE_BUILD)
endif() # if (ROCM_CCACHE_BUILD)
## Get version strings
get_version ( "1.7.0" )
get_version ( "1.8.0" )
if ( ${ROCM_PATCH_VERSION} )
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
@@ -1178,6 +1178,24 @@ hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agen
return amdExtTable->hsa_amd_svm_prefetch_async_fn(ptr, size, agent, num_dep_signals, dep_signals, completion_signal);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent) {
return amdExtTable->hsa_amd_spm_acquire_fn(agent);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent) {
return amdExtTable->hsa_amd_spm_release_fn(agent);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout,
uint32_t* size_copied, void* dest,
bool* is_data_loss) {
return amdExtTable->hsa_amd_spm_set_dest_buffer_fn(agent, size, timeout, size_copied, dest,
is_data_loss);
}
// Tools only table interfaces.
namespace rocr {
@@ -265,6 +265,17 @@ hsa_status_t HSA_API hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agen
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_acquire(hsa_agent_t agent);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_release(hsa_agent_t agent);
// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_spm_set_dest_buffer(hsa_agent_t agent, size_t size, uint32_t* timeout,
uint32_t* size_copied, void* dest,
bool* is_data_loss);
} // namespace amd
} // namespace rocr
@@ -395,6 +395,9 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_svm_attributes_set_fn = AMD::hsa_amd_svm_attributes_set;
amd_ext_api.hsa_amd_svm_attributes_get_fn = AMD::hsa_amd_svm_attributes_get;
amd_ext_api.hsa_amd_svm_prefetch_async_fn = AMD::hsa_amd_svm_prefetch_async;
amd_ext_api.hsa_amd_spm_acquire_fn = AMD::hsa_amd_spm_acquire;
amd_ext_api.hsa_amd_spm_release_fn = AMD::hsa_amd_spm_release;
amd_ext_api.hsa_amd_spm_set_dest_buffer_fn = AMD::hsa_amd_spm_set_dest_buffer;
}
void LoadInitialHsaApiTable() {
@@ -1069,5 +1069,52 @@ hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agen
CATCH;
}
hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent) {
TRY;
IS_OPEN();
const core::Agent* agent = core::Agent::Convert(preferred_agent);
if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
return HSA_STATUS_ERROR_INVALID_AGENT;
if (hsaKmtSPMAcquire(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
CATCH;
}
hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent) {
TRY;
IS_OPEN();
const core::Agent* agent = core::Agent::Convert(preferred_agent);
if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
return HSA_STATUS_ERROR_INVALID_AGENT;
if (hsaKmtSPMRelease(agent->node_id()) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
CATCH;
}
hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes,
uint32_t* timeout, uint32_t* size_copied, void* dest,
bool* is_data_loss) {
TRY;
IS_OPEN();
const core::Agent* agent = core::Agent::Convert(preferred_agent);
if (agent == NULL || !agent->IsValid() || agent->device_type() != core::Agent::kAmdGpuDevice)
return HSA_STATUS_ERROR_INVALID_AGENT;
if (hsaKmtSPMSetDestBuffer(agent->node_id(), size_in_bytes, timeout, size_copied, dest,
is_data_loss) != HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
CATCH;
}
} // namespace amd
} // namespace rocr
+3
Vedi File
@@ -226,6 +226,9 @@ global:
hsa_amd_svm_attributes_set;
hsa_amd_svm_attributes_get;
hsa_amd_svm_prefetch_async;
hsa_amd_spm_acquire;
hsa_amd_spm_release;
hsa_amd_spm_set_dest_buffer;
local:
*;
@@ -186,6 +186,9 @@ struct AmdExtTable {
decltype(hsa_amd_svm_attributes_set)* hsa_amd_svm_attributes_set_fn;
decltype(hsa_amd_svm_attributes_get)* hsa_amd_svm_attributes_get_fn;
decltype(hsa_amd_svm_prefetch_async)* hsa_amd_svm_prefetch_async_fn;
decltype(hsa_amd_spm_acquire)* hsa_amd_spm_acquire_fn;
decltype(hsa_amd_spm_release)* hsa_amd_spm_release_fn;
decltype(hsa_amd_spm_set_dest_buffer)* hsa_amd_spm_set_dest_buffer_fn;
decltype(hsa_amd_queue_cu_get_mask)* hsa_amd_queue_cu_get_mask_fn;
};
+53
Vedi File
@@ -2411,6 +2411,59 @@ hsa_status_t hsa_amd_svm_prefetch_async(void* ptr, size_t size, hsa_agent_t agen
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal);
/**
* @brief Acquire Stream Performance Monitor on an agent
*
* Acquire exclusive use of SPM on @p preferred_agent.
* See hsa_amd_spm_set_dest_buffer to provide a destination buffer to KFD to start recording and
* retrieve this data.
* @param[in] preferred_agent Agent on which to acquire SPM
*/
hsa_status_t hsa_amd_spm_acquire(hsa_agent_t preferred_agent);
/**
* @brief Release Stream Performance Monitor on an agent
*
* Release exclusive use of SPM on @p preferred_agent. This will stop KFD writing SPM data.
* If a destination buffer is set, then data in the destination buffer is available to user
* when this function returns.
*
* @param[in] preferred_agent Agent on which to release SPM
*/
hsa_status_t hsa_amd_spm_release(hsa_agent_t preferred_agent);
/**
* @brief Set up the current destination user mode buffer for stream performance
* counter data. KFD will start writing SPM data into the destination buffer. KFD will continue
* to copy data into the current destination buffer until any of the following functions are called
* - hsa_amd_spm_release
* - hsa_amd_spm_set_dest_buffer with dest set to NULL
* - hsa_amd_spm_set_dest_buffer with dest set to a new buffer
*
* if @p timeout is non-0, the call will wait for up to @p timeout ms for the previous
* buffer to be filled. If previous buffer to be filled before timeout, the @p timeout
* will be updated value with the time remaining. If the timeout is exceeded, the function
* copies any partial data available into the previous user buffer and returns success.
* User should not access destination data while KFD is copying data.
* If the previous destination buffer was full, then @p is_data_loss flag is set.
* @p dest is CPU accessible memory. It could be malloc'ed memory or host allocated memory
*
* @param[in] preferred_agent Agent on which to set the dest buffer
*
* @param[in] size_in_bytes size of the buffer
*
* @param[in/out] timeout timeout in milliseconds
*
* @param[out] size_copied number of bytes copied
*
* @param[in] dest destination address. Set to NULL to stop copy on previous buffer
*
* @param[out] is_data_loss true is data was lost
*/
hsa_status_t hsa_amd_spm_set_dest_buffer(hsa_agent_t preferred_agent, size_t size_in_bytes,
uint32_t* timeout, uint32_t* size_copied, void* dest,
bool* is_data_loss);
#ifdef __cplusplus
} // end extern "C" block
#endif