[ROCm/rocprofiler commit: ff1e5b67a7]
Этот коммит содержится в:
Evgeny
2018-02-02 15:38:28 -06:00
родитель fa134bbf4d
Коммит 4b7e5f51da
5 изменённых файлов: 55 добавлений и 7 удалений
+12 -2
Просмотреть файл
@@ -94,9 +94,9 @@ class Group {
}
hsa_status_t Finalize() {
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_);
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_, read_vector_);
if (status == HSA_STATUS_SUCCESS) {
status = sqtt_profile_.Finalize(start_vector_, stop_vector_);
status = sqtt_profile_.Finalize(start_vector_, stop_vector_, read_vector_);
}
if (status == HSA_STATUS_SUCCESS) {
if (!pmc_profile_.Empty()) ++n_profiles_;
@@ -115,6 +115,7 @@ class Group {
info_vector_t& GetInfoVector() { return info_vector_; }
const pkt_vector_t& GetStartVector() const { return start_vector_; }
const pkt_vector_t& GetStopVector() const { return stop_vector_; }
const pkt_vector_t& GetReadVector() const { return read_vector_; }
Context* GetContext() { return context_; }
uint32_t GetIndex() const { return index_; }
@@ -133,6 +134,7 @@ class Group {
info_vector_t info_vector_;
pkt_vector_t start_vector_;
pkt_vector_t stop_vector_;
pkt_vector_t read_vector_;
uint32_t n_profiles_;
uint32_t refs_;
Context* const context_;
@@ -286,6 +288,9 @@ class Context {
const pkt_vector_t& StopPackets(const uint32_t& group_index) const {
return set_[group_index].GetStopVector();
}
const pkt_vector_t& ReadPackets(const uint32_t& group_index) const {
return set_[group_index].GetReadVector();
}
void Start(const uint32_t& group_index, Queue* const queue = NULL) {
const pkt_vector_t& start_packets = StartPackets(group_index);
@@ -297,6 +302,11 @@ class Context {
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
submit_queue->Submit(&stop_packets[0], stop_packets.size());
}
void Read(const uint32_t& group_index, Queue* const queue = NULL) {
const pkt_vector_t& read_packets = StopPackets(group_index);
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
submit_queue->Submit(&read_packets[0], read_packets.size());
}
void Submit(const uint32_t& group_index, const packet_t* packet, Queue* const queue = NULL) {
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
Start(group_index, submit_queue);
+24 -1
Просмотреть файл
@@ -10,6 +10,10 @@
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#ifndef AQL_PROFILE_READ_API_ENABLE
#define AQL_PROFILE_READ_API_ENABLE 0
#endif
namespace rocprofiler {
struct profile_info_t {
const event_t* event;
@@ -96,7 +100,7 @@ class Profile {
virtual void Insert(const profile_info_t& info) { info_vector_.push_back(info.rinfo); }
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) {
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector, pkt_vector_t& read_vector) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (!info_vector_.empty()) {
@@ -104,6 +108,7 @@ class Profile {
const pfn_t* api = rsrc->AqlProfileApi();
packet_t start{};
packet_t stop{};
packet_t read{};
// Check the profile buffer sizes
status = api->hsa_ven_amd_aqlprofile_start(&profile_, NULL);
@@ -114,6 +119,12 @@ class Profile {
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start");
status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop");
#if AQL_PROFILE_READ_API_ENABLE
status = api->hsa_ven_amd_aqlprofile_read(&profile_, &read);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_read");
#endif // AQL_PROFILE_READ_API_ENABLE
// Set completion signals
hsa_signal_t dummy_signal{};
dummy_signal.handle = 0;
@@ -122,6 +133,7 @@ class Profile {
status = hsa_signal_create(1, 0, NULL, &post_signal);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "signal_create " << std::hex << status);
stop.completion_signal = post_signal;
read.completion_signal = post_signal;
completion_signal_ = post_signal;
if (is_legacy_) {
@@ -130,6 +142,7 @@ class Profile {
start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(
&start, reinterpret_cast<void*>(&start_vector[start_index]));
if (status != HSA_STATUS_SUCCESS)
@@ -138,9 +151,19 @@ class Profile {
&stop, reinterpret_cast<void*>(&stop_vector[stop_index]));
if (status != HSA_STATUS_SUCCESS)
AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
#if AQL_PROFILE_READ_API_ENABLE
const uint32_t read_index = read_vector.size();
read_vector.insert(read_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(
&read, reinterpret_cast<void*>(&read_vector[read_index]));
if (status != HSA_STATUS_SUCCESS)
AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
#endif // AQL_PROFILE_READ_API_ENABLE
} else {
start_vector.push_back(start);
stop_vector.push_back(stop);
read_vector.push_back(read);
}
}
+15
Просмотреть файл
@@ -241,6 +241,14 @@ PUBLIC_API hsa_status_t rocprofiler_stop(rocprofiler_t* handle, uint32_t group_i
API_METHOD_SUFFIX
}
// Read profiling
PUBLIC_API hsa_status_t rocprofiler_read(rocprofiler_t* handle, uint32_t group_index) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
context->Read(group_index);
API_METHOD_SUFFIX
}
// Get profiling data
PUBLIC_API hsa_status_t rocprofiler_get_data(rocprofiler_t* handle, uint32_t group_index) {
API_METHOD_PREFIX
@@ -263,6 +271,13 @@ PUBLIC_API hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group) {
API_METHOD_SUFFIX
}
// Read profiling
PUBLIC_API hsa_status_t rocprofiler_group_read(rocprofiler_group_t* group) {
API_METHOD_PREFIX
rocprofiler_read(group->context, group->index);
API_METHOD_SUFFIX
}
// Get profiling data
PUBLIC_API hsa_status_t rocprofiler_group_get_data(rocprofiler_group_t* group) {
API_METHOD_PREFIX
+2 -2
Просмотреть файл
@@ -107,8 +107,8 @@ HsaRsrcFactory::HsaRsrcFactory() {
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
for (auto p : cpu_list_) free(const_cast<AgentInfo*>(p));
for (auto p : gpu_list_) free(const_cast<AgentInfo*>(p));
for (auto p : cpu_list_) delete p;
for (auto p : gpu_list_) delete p;
printf("HSA shutdown\n");
hsa_status_t status = hsa_shut_down();
+2 -2
Просмотреть файл
@@ -104,8 +104,8 @@ HsaRsrcFactory::HsaRsrcFactory() {
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
for (auto p : cpu_list_) free(const_cast<AgentInfo*>(p));
for (auto p : gpu_list_) free(const_cast<AgentInfo*>(p));
for (auto p : cpu_list_) delete p;
for (auto p : gpu_list_) delete p;
printf("HSA shutdown\n");
hsa_status_t status = hsa_shut_down();