SWDEV-539130 - Log blit copy duration (#258)
Co-authored-by: Pengda Xie <pengda.xie@amd.com>
Dieser Commit ist enthalten in:
committet von
GitHub
Ursprung
b9fc643a56
Commit
83a10986a4
@@ -71,10 +71,8 @@ class BlitKernel : public core::Blit {
|
||||
///
|
||||
/// @note: The call will block until all AQL packets have been executed.
|
||||
///
|
||||
/// @param agent Agent passed to Initialize.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy(const core::Agent& agent) override;
|
||||
virtual hsa_status_t Destroy() override;
|
||||
|
||||
/// @brief Submit an AQL packet to perform vector copy. The call is blocking
|
||||
/// until the command execution is finished.
|
||||
@@ -189,6 +187,8 @@ class BlitKernel : public core::Blit {
|
||||
|
||||
std::map<KernelType, KernelCode> kernels_;
|
||||
|
||||
const core::Agent* agent_;
|
||||
|
||||
/// AQL queue for submitting the vector copy kernel.
|
||||
core::Queue* queue_;
|
||||
uint32_t queue_bitmask_;
|
||||
|
||||
@@ -94,10 +94,8 @@ template <bool useGCR> class BlitSdma : public BlitSdmaBase {
|
||||
///
|
||||
/// @note: The call will block until all packets have executed.
|
||||
///
|
||||
/// @param agent Agent passed to Initialize.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy(const core::Agent& agent) override;
|
||||
virtual hsa_status_t Destroy() override;
|
||||
|
||||
/// @brief Submit a linear copy command to the queue buffer.
|
||||
///
|
||||
|
||||
@@ -63,7 +63,7 @@ class Blit {
|
||||
/// @param agent Agent passed to Initialize.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Destroy(const core::Agent& agent) = 0;
|
||||
virtual hsa_status_t Destroy() = 0;
|
||||
|
||||
/// @brief Submit a linear copy command to the the underlying compute device's
|
||||
/// control block. The call is blocking until the command execution is
|
||||
|
||||
@@ -551,6 +551,7 @@ BlitKernel::BlitKernel(core::Queue* queue)
|
||||
BlitKernel::~BlitKernel() {}
|
||||
|
||||
hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
agent_ = &agent;
|
||||
queue_bitmask_ = queue_->public_handle()->size - 1;
|
||||
|
||||
bytes_written_.resize(queue_->public_handle()->size);
|
||||
@@ -561,15 +562,15 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
return status;
|
||||
}
|
||||
|
||||
const AMD::GpuAgent& gpuAgent = static_cast<const AMD::GpuAgent&>(agent);
|
||||
const AMD::GpuAgent* gpuAgent = static_cast<const AMD::GpuAgent*>(agent_);
|
||||
kernarg_async_ = reinterpret_cast<KernelArgs*>(
|
||||
gpuAgent.system_allocator()(queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16),
|
||||
gpuAgent->system_allocator()(queue_->public_handle()->size * AlignUp(sizeof(KernelArgs), 16),
|
||||
16, core::MemoryRegion::AllocateNoFlags));
|
||||
|
||||
kernarg_async_mask_ = queue_->public_handle()->size - 1;
|
||||
|
||||
// Obtain the number of compute units in the underlying agent.
|
||||
num_cus_ = gpuAgent.properties().NumFComputeCores / 4;
|
||||
num_cus_ = gpuAgent->properties().NumFComputeCores / 4;
|
||||
|
||||
// Assemble shaders to AQL code objects.
|
||||
std::map<KernelType, const char*> kernel_names = {
|
||||
@@ -579,29 +580,29 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
|
||||
for (auto kernel_name : kernel_names) {
|
||||
KernelCode& kernel = kernels_[kernel_name.first];
|
||||
gpuAgent.AssembleShader(kernel_name.second, AMD::GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
|
||||
gpuAgent->AssembleShader(kernel_name.second, AMD::GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
|
||||
kernel.code_buf_size_);
|
||||
}
|
||||
|
||||
if (agent.profiling_enabled()) {
|
||||
if (agent_->profiling_enabled()) {
|
||||
return EnableProfiling(true);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t BlitKernel::Destroy(const core::Agent& agent) {
|
||||
hsa_status_t BlitKernel::Destroy() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
|
||||
const AMD::GpuAgent& gpuAgent = static_cast<const AMD::GpuAgent&>(agent);
|
||||
const AMD::GpuAgent* gpuAgent = static_cast<const AMD::GpuAgent*>(agent_);
|
||||
|
||||
for (auto kernel_pair : kernels_) {
|
||||
gpuAgent.ReleaseShader(kernel_pair.second.code_buf_,
|
||||
gpuAgent->ReleaseShader(kernel_pair.second.code_buf_,
|
||||
kernel_pair.second.code_buf_size_);
|
||||
}
|
||||
|
||||
if (kernarg_async_ != NULL) {
|
||||
gpuAgent.system_deallocator()(kernarg_async_);
|
||||
gpuAgent->system_deallocator()(kernarg_async_);
|
||||
}
|
||||
|
||||
if (completion_signal_.handle != 0) {
|
||||
@@ -635,6 +636,11 @@ hsa_status_t BlitKernel::SubmitLinearCopyCommand(void* dst, const void* src,
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if(agent_->profiling_enabled()) {
|
||||
LogSignalDuration(HSA_AMD_LOG_FLAG_BLIT_KERNEL_PKTS, completion_signal_,
|
||||
"BlitKernel::SubmitLinearCopyCommand");
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ hsa_status_t BlitSdma<useGCR>::Initialize(const core::Agent& agent, bool use_xgm
|
||||
if (queue_start_addr_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
MAKE_NAMED_SCOPE_GUARD(cleanupOnException, [&]() { Destroy(agent); };);
|
||||
MAKE_NAMED_SCOPE_GUARD(cleanupOnException, [&]() { Destroy(); };);
|
||||
std::memset(queue_start_addr_, 0, kQueueSize);
|
||||
|
||||
bytes_written_.resize(kQueueSize);
|
||||
@@ -208,7 +208,7 @@ hsa_status_t BlitSdma<useGCR>::Initialize(const core::Agent& agent, bool use_xgm
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
template <bool useGCR> hsa_status_t BlitSdma<useGCR>::Destroy(const core::Agent& agent) {
|
||||
template <bool useGCR> hsa_status_t BlitSdma<useGCR>::Destroy() {
|
||||
// Release all allocated resources and reset them to zero.
|
||||
|
||||
if (queue_resource_.QueueId != 0) {
|
||||
|
||||
@@ -738,7 +738,7 @@ core::Blit* GpuAgent::CreateBlitSdma(bool use_xgmi, int rec_eng) {
|
||||
rec_eng = uses_rec_sdma_eng_id_mask_ || !use_xgmi ? rec_eng : -1;
|
||||
|
||||
if (sdma->Initialize(*this, use_xgmi, copy_size_override, rec_eng) != HSA_STATUS_SUCCESS) {
|
||||
sdma->Destroy(*this);
|
||||
sdma->Destroy();
|
||||
delete sdma;
|
||||
sdma = nullptr;
|
||||
}
|
||||
@@ -750,7 +750,7 @@ core::Blit* GpuAgent::CreateBlitKernel(core::Queue* queue) {
|
||||
AMD::BlitKernel* kernl = new AMD::BlitKernel(queue);
|
||||
|
||||
if (kernl->Initialize(*this) != HSA_STATUS_SUCCESS) {
|
||||
kernl->Destroy(*this);
|
||||
kernl->Destroy();
|
||||
delete kernl;
|
||||
kernl = NULL;
|
||||
}
|
||||
@@ -912,7 +912,7 @@ void GpuAgent::ReleaseResources() {
|
||||
this->Disable();
|
||||
for (auto& blit : blits_) {
|
||||
if (!blit.empty()) {
|
||||
hsa_status_t status = blit->Destroy(*this);
|
||||
hsa_status_t status = blit->Destroy();
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,6 +151,16 @@ static __forceinline unsigned long long int strtoull(const char* str,
|
||||
rocr::log_printf(__FILENAME__, __LINE__, format, ##__VA_ARGS__); \
|
||||
} while (false);
|
||||
|
||||
#define LogSignalDuration(flag, signal, msg) \
|
||||
do { \
|
||||
if (hsa_flag_isset64(log_flags, flag)) { \
|
||||
amd_signal_t* amd_signal = reinterpret_cast<amd_signal_t*>(signal.handle); \
|
||||
rocr::log_printf(__FILENAME__, __LINE__, \
|
||||
"%s Signal = (0x%lx), ticks start/end = %lu / %lu, Ticks elapsed = %lu", msg, signal, \
|
||||
amd_signal->start_ts, amd_signal->end_ts, amd_signal->end_ts - amd_signal->start_ts); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
// A macro to remove unused variable warnings
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren