From 867ae7b2dd941fcc63cf693f01bbd8392e83b2cc Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 9 Apr 2021 08:06:42 +0000 Subject: [PATCH] concurrent: enable PmcStopper to end perf counting. Change-Id: I89785277678141e29349e162df10203787050643 [ROCm/rocprofiler commit: c4828a9de0ae759d8031f8f61ea1e9f0a8cbd2c5] --- projects/rocprofiler/src/core/context.h | 5 +---- projects/rocprofiler/src/core/gpu_command.h | 6 ++++++ .../rocprofiler/src/core/intercept_queue.h | 5 ----- projects/rocprofiler/src/core/profile.h | 2 +- projects/rocprofiler/src/core/rocprofiler.cpp | 20 ++++++++----------- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index f3c2f2a805..a8026dd3f2 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -345,8 +345,6 @@ class Context { // Concurrent profiling mode static bool k_concurrent_; - // Packets to stop the profiling - static pkt_vector_t stop_packets_; private: Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info, @@ -648,8 +646,7 @@ class Context { }; #define CONTEXT_INSTANTIATE() \ - bool rocprofiler::Context::k_concurrent_ = false; \ - std::vector rocprofiler::Context::stop_packets_{}; + bool rocprofiler::Context::k_concurrent_ = false; } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/gpu_command.h b/projects/rocprofiler/src/core/gpu_command.h index db2ddb8c19..e8439c4a68 100644 --- a/projects/rocprofiler/src/core/gpu_command.h +++ b/projects/rocprofiler/src/core/gpu_command.h @@ -26,6 +26,7 @@ THE SOFTWARE. #include #include "core/types.h" +#include "util/exception.h" #include "util/hsa_rsrc_factory.h" namespace rocprofiler { @@ -45,7 +46,12 @@ static inline size_t IssueGpuCommand(gpu_cmd_op_t op, hsa_queue_t* queue) { packet_t* command; const size_t size = GetGpuCommand(op, agent_info, &command); + hsa_status_t status = hsa_signal_create(1, 0, NULL, &(command->completion_signal)); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "signal_create " << std::hex << status); rocprofiler::util::HsaRsrcFactory::Instance().Submit(queue, command, size); + rocprofiler::util::HsaRsrcFactory::Instance().SignalWait(command->completion_signal, 1); + status = hsa_signal_destroy(command->completion_signal); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "signal_destroy " << std::hex << status); return HSA_STATUS_SUCCESS; } diff --git a/projects/rocprofiler/src/core/intercept_queue.h b/projects/rocprofiler/src/core/intercept_queue.h index 826420dc51..e639ba9d0c 100644 --- a/projects/rocprofiler/src/core/intercept_queue.h +++ b/projects/rocprofiler/src/core/intercept_queue.h @@ -369,11 +369,6 @@ class InterceptQueue { packets.insert(packets.end(), *packet); // Read at kernel end packets.insert(packets.end(), mid, read_vector.end()); - - // Save the stop packets for eventual PmcStopper - if (Context::stop_packets_.empty()) { - Context::stop_packets_.insert(Context::stop_packets_.end(), stop_vector.begin(), stop_vector.end()); - } } if (tracker_entry != NULL) { diff --git a/projects/rocprofiler/src/core/profile.h b/projects/rocprofiler/src/core/profile.h index 09ad26445f..609c811dd2 100644 --- a/projects/rocprofiler/src/core/profile.h +++ b/projects/rocprofiler/src/core/profile.h @@ -137,7 +137,7 @@ class Profile { virtual void Insert(const profile_info_t& info) { info_vector_.push_back(info.rinfo); } - void SetConcurrent(profile_t* profile) { + static void SetConcurrent(profile_t* profile) { // Check whether conconcurrent has been set for (const parameter_t* p = profile->parameters; p < (profile->parameters + profile->parameter_count); ++p) { diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index e79b93940e..bbb97e3fbe 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -247,26 +247,22 @@ void PmcStopper() { // Create queue hsa_queue_t* queue; - hsa_status_t status = rocprofiler::CreateQueuePro(agent_info->dev_id, 1, - HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue); - if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "CreateQueuePro (" - << gpu_id << ") " << std::hex << status); + const bool ret = rsrc->CreateQueue(agent_info, 10, &queue); + if (ret != true) EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue(" << gpu_id << ")"); - // Submit packets - for (auto& pkt: Context::stop_packets_) { - rsrc->Submit(queue, &pkt); - // Wait for stop packet to complete - rsrc->SignalWaitRestore(pkt.completion_signal, 1); - } + // Issue PMC-enable GPU command + IssueGpuCommand(PMC_DISABLE_GPU_CMD_OP, agent_info, queue); - hsa_queue_destroy(queue); + rsrc->HsaApi()->hsa_queue_destroy(queue); } } // Unload profiling tool librray void UnloadTool() { ONLOAD_TRACE("tool handle(" << tool_handle << ")"); - //if (Context::k_concurrent_) PmcStopper(); + + if (Context::k_concurrent_) PmcStopper(); + if (tool_handle) { tool_handler_t handler = reinterpret_cast(dlsym(tool_handle, "OnUnloadTool")); if (handler == NULL) {