@@ -94,9 +94,9 @@ class Group {
|
||||
}
|
||||
|
||||
hsa_status_t Finalize() {
|
||||
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_);
|
||||
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_, read_vector_);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = sqtt_profile_.Finalize(start_vector_, stop_vector_);
|
||||
status = sqtt_profile_.Finalize(start_vector_, stop_vector_, read_vector_);
|
||||
}
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
if (!pmc_profile_.Empty()) ++n_profiles_;
|
||||
@@ -115,6 +115,7 @@ class Group {
|
||||
info_vector_t& GetInfoVector() { return info_vector_; }
|
||||
const pkt_vector_t& GetStartVector() const { return start_vector_; }
|
||||
const pkt_vector_t& GetStopVector() const { return stop_vector_; }
|
||||
const pkt_vector_t& GetReadVector() const { return read_vector_; }
|
||||
Context* GetContext() { return context_; }
|
||||
uint32_t GetIndex() const { return index_; }
|
||||
|
||||
@@ -133,6 +134,7 @@ class Group {
|
||||
info_vector_t info_vector_;
|
||||
pkt_vector_t start_vector_;
|
||||
pkt_vector_t stop_vector_;
|
||||
pkt_vector_t read_vector_;
|
||||
uint32_t n_profiles_;
|
||||
uint32_t refs_;
|
||||
Context* const context_;
|
||||
@@ -286,6 +288,9 @@ class Context {
|
||||
const pkt_vector_t& StopPackets(const uint32_t& group_index) const {
|
||||
return set_[group_index].GetStopVector();
|
||||
}
|
||||
const pkt_vector_t& ReadPackets(const uint32_t& group_index) const {
|
||||
return set_[group_index].GetReadVector();
|
||||
}
|
||||
|
||||
void Start(const uint32_t& group_index, Queue* const queue = NULL) {
|
||||
const pkt_vector_t& start_packets = StartPackets(group_index);
|
||||
@@ -297,6 +302,11 @@ class Context {
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
submit_queue->Submit(&stop_packets[0], stop_packets.size());
|
||||
}
|
||||
void Read(const uint32_t& group_index, Queue* const queue = NULL) {
|
||||
const pkt_vector_t& read_packets = StopPackets(group_index);
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
submit_queue->Submit(&read_packets[0], read_packets.size());
|
||||
}
|
||||
void Submit(const uint32_t& group_index, const packet_t* packet, Queue* const queue = NULL) {
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
Start(group_index, submit_queue);
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#ifndef AQL_PROFILE_READ_API_ENABLE
|
||||
#define AQL_PROFILE_READ_API_ENABLE 0
|
||||
#endif
|
||||
|
||||
namespace rocprofiler {
|
||||
struct profile_info_t {
|
||||
const event_t* event;
|
||||
@@ -96,7 +100,7 @@ class Profile {
|
||||
|
||||
virtual void Insert(const profile_info_t& info) { info_vector_.push_back(info.rinfo); }
|
||||
|
||||
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) {
|
||||
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector, pkt_vector_t& read_vector) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
if (!info_vector_.empty()) {
|
||||
@@ -104,6 +108,7 @@ class Profile {
|
||||
const pfn_t* api = rsrc->AqlProfileApi();
|
||||
packet_t start{};
|
||||
packet_t stop{};
|
||||
packet_t read{};
|
||||
|
||||
// Check the profile buffer sizes
|
||||
status = api->hsa_ven_amd_aqlprofile_start(&profile_, NULL);
|
||||
@@ -114,6 +119,12 @@ class Profile {
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start");
|
||||
status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop");
|
||||
|
||||
#if AQL_PROFILE_READ_API_ENABLE
|
||||
status = api->hsa_ven_amd_aqlprofile_read(&profile_, &read);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_read");
|
||||
#endif // AQL_PROFILE_READ_API_ENABLE
|
||||
|
||||
// Set completion signals
|
||||
hsa_signal_t dummy_signal{};
|
||||
dummy_signal.handle = 0;
|
||||
@@ -122,6 +133,7 @@ class Profile {
|
||||
status = hsa_signal_create(1, 0, NULL, &post_signal);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "signal_create " << std::hex << status);
|
||||
stop.completion_signal = post_signal;
|
||||
read.completion_signal = post_signal;
|
||||
completion_signal_ = post_signal;
|
||||
|
||||
if (is_legacy_) {
|
||||
@@ -130,6 +142,7 @@ class Profile {
|
||||
|
||||
start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
|
||||
stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
|
||||
|
||||
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(
|
||||
&start, reinterpret_cast<void*>(&start_vector[start_index]));
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
@@ -138,9 +151,19 @@ class Profile {
|
||||
&stop, reinterpret_cast<void*>(&stop_vector[stop_index]));
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
|
||||
#if AQL_PROFILE_READ_API_ENABLE
|
||||
const uint32_t read_index = read_vector.size();
|
||||
read_vector.insert(read_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
|
||||
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(
|
||||
&read, reinterpret_cast<void*>(&read_vector[read_index]));
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
#endif // AQL_PROFILE_READ_API_ENABLE
|
||||
} else {
|
||||
start_vector.push_back(start);
|
||||
stop_vector.push_back(stop);
|
||||
read_vector.push_back(read);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -241,6 +241,14 @@ PUBLIC_API hsa_status_t rocprofiler_stop(rocprofiler_t* handle, uint32_t group_i
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Read profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_read(rocprofiler_t* handle, uint32_t group_index) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->Read(group_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling data
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_data(rocprofiler_t* handle, uint32_t group_index) {
|
||||
API_METHOD_PREFIX
|
||||
@@ -263,6 +271,13 @@ PUBLIC_API hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group) {
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Read profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_group_read(rocprofiler_group_t* group) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler_read(group->context, group->index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling data
|
||||
PUBLIC_API hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group) {
|
||||
API_METHOD_PREFIX
|
||||
|
||||
@@ -107,8 +107,8 @@ HsaRsrcFactory::HsaRsrcFactory() {
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
for (auto p : cpu_list_) free(const_cast<AgentInfo*>(p));
|
||||
for (auto p : gpu_list_) free(const_cast<AgentInfo*>(p));
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
|
||||
printf("HSA shutdown\n");
|
||||
hsa_status_t status = hsa_shut_down();
|
||||
|
||||
@@ -104,8 +104,8 @@ HsaRsrcFactory::HsaRsrcFactory() {
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
for (auto p : cpu_list_) free(const_cast<AgentInfo*>(p));
|
||||
for (auto p : gpu_list_) free(const_cast<AgentInfo*>(p));
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
|
||||
printf("HSA shutdown\n");
|
||||
hsa_status_t status = hsa_shut_down();
|
||||
|
||||
Ссылка в новой задаче
Block a user