concurrent: enable PmcStopper to end perf counting.

Change-Id: I89785277678141e29349e162df10203787050643


[ROCm/rocprofiler commit: c4828a9de0]
This commit is contained in:
Evgeny
2021-04-09 08:06:42 +00:00
parent c701f9705c
commit 867ae7b2dd
5 changed files with 16 additions and 22 deletions
+1 -4
View File
@@ -345,8 +345,6 @@ class Context {
// Concurrent profiling mode
static bool k_concurrent_;
// Packets to stop the profiling
static pkt_vector_t stop_packets_;
private:
Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_feature_t* info,
@@ -648,8 +646,7 @@ class Context {
};
#define CONTEXT_INSTANTIATE() \
bool rocprofiler::Context::k_concurrent_ = false; \
std::vector<hsa_ext_amd_aql_pm4_packet_t> rocprofiler::Context::stop_packets_{};
bool rocprofiler::Context::k_concurrent_ = false;
} // namespace rocprofiler
@@ -26,6 +26,7 @@ THE SOFTWARE.
#include <hsa.h>
#include "core/types.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
@@ -45,7 +46,12 @@ static inline size_t IssueGpuCommand(gpu_cmd_op_t op,
hsa_queue_t* queue) {
packet_t* command;
const size_t size = GetGpuCommand(op, agent_info, &command);
hsa_status_t status = hsa_signal_create(1, 0, NULL, &(command->completion_signal));
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "signal_create " << std::hex << status);
rocprofiler::util::HsaRsrcFactory::Instance().Submit(queue, command, size);
rocprofiler::util::HsaRsrcFactory::Instance().SignalWait(command->completion_signal, 1);
status = hsa_signal_destroy(command->completion_signal);
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "signal_destroy " << std::hex << status);
return HSA_STATUS_SUCCESS;
}
@@ -369,11 +369,6 @@ class InterceptQueue {
packets.insert(packets.end(), *packet);
// Read at kernel end
packets.insert(packets.end(), mid, read_vector.end());
// Save the stop packets for eventual PmcStopper
if (Context::stop_packets_.empty()) {
Context::stop_packets_.insert(Context::stop_packets_.end(), stop_vector.begin(), stop_vector.end());
}
}
if (tracker_entry != NULL) {
+1 -1
View File
@@ -137,7 +137,7 @@ class Profile {
virtual void Insert(const profile_info_t& info) { info_vector_.push_back(info.rinfo); }
void SetConcurrent(profile_t* profile) {
static void SetConcurrent(profile_t* profile) {
// Check whether conconcurrent has been set
for (const parameter_t* p = profile->parameters;
p < (profile->parameters + profile->parameter_count); ++p) {
+8 -12
View File
@@ -247,26 +247,22 @@ void PmcStopper() {
// Create queue
hsa_queue_t* queue;
hsa_status_t status = rocprofiler::CreateQueuePro(agent_info->dev_id, 1,
HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "CreateQueuePro ("
<< gpu_id << ") " << std::hex << status);
const bool ret = rsrc->CreateQueue(agent_info, 10, &queue);
if (ret != true) EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue(" << gpu_id << ")");
// Submit packets
for (auto& pkt: Context::stop_packets_) {
rsrc->Submit(queue, &pkt);
// Wait for stop packet to complete
rsrc->SignalWaitRestore(pkt.completion_signal, 1);
}
// Issue PMC-enable GPU command
IssueGpuCommand(PMC_DISABLE_GPU_CMD_OP, agent_info, queue);
hsa_queue_destroy(queue);
rsrc->HsaApi()->hsa_queue_destroy(queue);
}
}
// Unload profiling tool librray
void UnloadTool() {
ONLOAD_TRACE("tool handle(" << tool_handle << ")");
//if (Context::k_concurrent_) PmcStopper();
if (Context::k_concurrent_) PmcStopper();
if (tool_handle) {
tool_handler_t handler = reinterpret_cast<tool_handler_t>(dlsym(tool_handle, "OnUnloadTool"));
if (handler == NULL) {