several fixes; adding direct loading of alprofile library
[ROCm/rocprofiler commit: 8270530fec]
This commit is contained in:
@@ -55,12 +55,14 @@ class InterceptQueue {
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
InterceptQueue* obj = new InterceptQueue(agent, proxy);
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
}
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -97,7 +99,7 @@ class InterceptQueue {
|
||||
dispatch_packet->kernel_object,
|
||||
GetKernelName(dispatch_packet)};
|
||||
hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) {
|
||||
Context* context = reinterpret_cast<Context*>(group.context);
|
||||
const pkt_vector_t& start_vector = context->StartPackets(group.index);
|
||||
const pkt_vector_t& stop_vector = context->StopPackets(group.index);
|
||||
|
||||
@@ -110,13 +110,18 @@ class MetricsDict {
|
||||
MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) {
|
||||
const char* xml_name = getenv("ROCP_METRICS");
|
||||
if (xml_name != NULL) {
|
||||
xml_ = new xml::Xml(xml_name);
|
||||
xml_ = xml::Xml::Create(xml_name);
|
||||
if (xml_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metrics .xml open error '" << xml_name << "'");
|
||||
std::cout << "ROCProfiler: importing metrics from '" << xml_name << "':" << std::endl;
|
||||
ImportMetrics(agent_info, agent_info->gfxip);
|
||||
ImportMetrics(agent_info, "global");
|
||||
}
|
||||
}
|
||||
|
||||
~MetricsDict() {
|
||||
xml::Xml::Destroy(xml_);
|
||||
}
|
||||
|
||||
void ImportMetrics(const util::AgentInfo* agent_info, const char* scope) {
|
||||
auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric");
|
||||
if (!scope_list.empty()) {
|
||||
|
||||
@@ -24,7 +24,7 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3
|
||||
ProxyQueue* instance = new SimpleProxyQueue();
|
||||
#endif
|
||||
if (instance != NULL) {
|
||||
const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size,
|
||||
suc = instance->Init(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue);
|
||||
if (suc != HSA_STATUS_SUCCESS) {
|
||||
delete instance;
|
||||
@@ -32,10 +32,12 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3
|
||||
}
|
||||
}
|
||||
*status = suc;
|
||||
assert(*status == HSA_STATUS_SUCCESS);
|
||||
return instance;
|
||||
}
|
||||
|
||||
hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) {
|
||||
assert(obj != NULL);
|
||||
auto suc = obj->Cleanup();
|
||||
delete obj;
|
||||
return suc;
|
||||
|
||||
@@ -7,5 +7,5 @@ void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::StoreIndex;
|
||||
}
|
||||
|
||||
std::map<signal_handle_t, SimpleProxyQueue*> SimpleProxyQueue::queue_map_;
|
||||
SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL;
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -24,8 +24,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static void SignalStore(hsa_signal_t signal, hsa_signal_value_t que_idx) {
|
||||
auto it = queue_map_.find(signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
auto it = queue_map_->find(signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
const uint64_t begin = instance->submit_index_;
|
||||
const uint64_t end = que_idx + 1;
|
||||
@@ -46,8 +46,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
|
||||
static uint64_t LoadIndex(const hsa_queue_t* queue) {
|
||||
uint64_t index = 0;
|
||||
auto it = queue_map_.find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
auto it = queue_map_->find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->mutex_.lock();
|
||||
index = instance->queue_index_;
|
||||
@@ -58,8 +58,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
}
|
||||
|
||||
static void StoreIndex(const hsa_queue_t* queue, uint64_t value) {
|
||||
auto it = queue_map_.find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
auto it = queue_map_->find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->queue_index_ = value;
|
||||
instance->mutex_.unlock();
|
||||
@@ -115,6 +115,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
~SimpleProxyQueue() {}
|
||||
|
||||
private:
|
||||
typedef std::map<signal_handle_t, SimpleProxyQueue*> queue_map_t;
|
||||
|
||||
hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
@@ -129,6 +131,7 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
if (agent_info_ != NULL) {
|
||||
if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) {
|
||||
printf("queue_create size 0x%x(%d)\n", size, (int)size);
|
||||
status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, &queue_);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
@@ -138,11 +141,16 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
uintptr_t addr = (uintptr_t)data_array_;
|
||||
queue_->base_address = (void*)((addr + align_mask_) & ~align_mask_);
|
||||
status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal));
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
queue_mask_ = size - 1;
|
||||
queue_map_[queue_->doorbell_signal.handle] = this;
|
||||
|
||||
if (queue_map_ == NULL) queue_map_ = new queue_map_t;
|
||||
(*queue_map_)[queue_->doorbell_signal.handle] = this;
|
||||
}
|
||||
else abort();
|
||||
}
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -155,7 +163,7 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
return status;
|
||||
}
|
||||
|
||||
static std::map<signal_handle_t, SimpleProxyQueue*> queue_map_;
|
||||
static queue_map_t* queue_map_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
hsa_queue_t* queue_;
|
||||
static const uintptr_t align_mask_ = sizeof(packet_t) - 1;
|
||||
|
||||
@@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
@@ -85,7 +86,10 @@ HsaRsrcFactory::HsaRsrcFactory() {
|
||||
// Get AqlProfile API table
|
||||
aqlprofile_api_ = {0};
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
|
||||
CHECK_STATUS("aqlprofile API table query failed", status);
|
||||
#ifdef ROCP_LOAD_AQLPROF
|
||||
if (status != HSA_STATUS_SUCCESS) status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
@@ -99,6 +103,39 @@ HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_get_info)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Add system agent info
|
||||
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
// Determine if device is a Gpu agent
|
||||
|
||||
@@ -222,7 +222,8 @@ class HsaRsrcFactory {
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Return AqlProfile API table
|
||||
const hsa_ven_amd_aqlprofile_1_00_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t;
|
||||
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
// Return Loader API table
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
@@ -234,6 +235,9 @@ class HsaRsrcFactory {
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
|
||||
|
||||
// Load AQL profile HSA extension library directly
|
||||
static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api);
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory();
|
||||
@@ -257,7 +261,7 @@ class HsaRsrcFactory {
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// AqlProfile API table
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_;
|
||||
aqlprofile_pfn_t aqlprofile_api_;
|
||||
|
||||
// Loader API table
|
||||
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
|
||||
|
||||
@@ -27,6 +27,33 @@ class Xml {
|
||||
|
||||
enum { DECL_STATE, BODY_STATE };
|
||||
|
||||
static Xml* Create(const char* file_name) {
|
||||
Xml* xml = new Xml(file_name);
|
||||
if (xml->fd_ == -1) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
}
|
||||
return xml;
|
||||
}
|
||||
|
||||
static void Destroy(Xml *xml) { delete xml; }
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) { return map_[global_tag]; }
|
||||
|
||||
void Print() const {
|
||||
for (auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Xml(const char* file_name)
|
||||
: file_name_(file_name),
|
||||
file_line_(0),
|
||||
@@ -39,7 +66,7 @@ class Xml {
|
||||
|
||||
fd_ = open(file_name, O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
std::cout << "XML file not found: " << file_name << std::endl;
|
||||
perror("open XML file");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -117,22 +144,8 @@ class Xml {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) { return map_[global_tag]; }
|
||||
~Xml() {}
|
||||
|
||||
void Print() const {
|
||||
for (auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
|
||||
@@ -61,6 +61,7 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
|
||||
uint32_t num_pkts = 128;
|
||||
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
|
||||
hsa_queue_ = NULL;
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
#define TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_pgen.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Class implements PMC profiling
|
||||
class TestPGenRocp : public TestPGen {
|
||||
public:
|
||||
explicit TestPGenRocp(TestAql* t) : TestPGen(t) { std::clog << "Test: PGen ROCP" << std::endl; }
|
||||
|
||||
bool Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
status = rocprofiler_on_dispatch(&profile_, PrePacket(), PostPacket());
|
||||
TEST_STATUS(status != HSA_STATUS_SUCCESS);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
private:
|
||||
bool BuildPackets() { return true; }
|
||||
|
||||
bool DumpData() {
|
||||
std::clog << "TestPGenRocp::DumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
api_.hsa_ven_amd_aqlprofile_iterate_data(&profile_, TestPGenRocpCallback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << std::dec << "event(block(" << it->pmc_data.event.block_name << "_"
|
||||
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
|
||||
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
@@ -62,7 +62,7 @@ bool TestPMgr::AddPacketGfx8(const packet_t* packet) {
|
||||
// Create legacy devices PM4 data
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
|
||||
slot_pm4_t data;
|
||||
api_.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
|
||||
api_->hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue());
|
||||
@@ -128,16 +128,14 @@ bool TestPMgr::Initialize(int argc, char** argv) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
status = hsa_signal_create(1, 0, NULL, &post_signal_);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api_);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
api_ = HsaRsrcFactory::Instance().AqlProfileApi();;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_({0}) {
|
||||
TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_(NULL) {
|
||||
memset(&pre_packet_, 0, sizeof(pre_packet_));
|
||||
memset(&post_packet_, 0, sizeof(post_packet_));
|
||||
dummy_signal_.handle = 0;
|
||||
post_signal_ = dummy_signal_;
|
||||
memset(&api_, 0, sizeof(api_));
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ class TestPMgr : public TestAql {
|
||||
hsa_signal_t dummy_signal_;
|
||||
hsa_signal_t post_signal_;
|
||||
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t api_;
|
||||
HsaRsrcFactory::aqlprofile_pfn_t* api_;
|
||||
|
||||
virtual bool BuildPackets() { return false; }
|
||||
virtual bool DumpData() { return false; }
|
||||
|
||||
@@ -320,7 +320,11 @@ CONSTRUCTOR_API void constructor() {
|
||||
exit(1);
|
||||
}
|
||||
printf("ROCProfiler: input from \"%s\"\n", xml_name);
|
||||
xml::Xml* xml = new xml::Xml(xml_name);
|
||||
xml::Xml* xml = xml::Xml::Create(xml_name);
|
||||
if (xml == NULL) {
|
||||
fprintf(stderr, "Input file not found '%s'\n", xml_name);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Getting metrics
|
||||
auto metrics_list = xml->GetNodes("top.metric");
|
||||
|
||||
@@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
@@ -116,6 +117,12 @@ HsaRsrcFactory::HsaRsrcFactory() {
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
|
||||
// Get AqlProfile API table
|
||||
aqlprofile_api_ = {0};
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
|
||||
if (status != HSA_STATUS_SUCCESS) status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
@@ -124,6 +131,39 @@ HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_get_info)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
|
||||
@@ -27,6 +27,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -54,7 +55,7 @@ static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
typedef struct {
|
||||
struct AgentInfo {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
@@ -78,8 +79,7 @@ typedef struct {
|
||||
|
||||
// Memory region supporting kernel arguments
|
||||
hsa_region_t kernarg_region;
|
||||
|
||||
} AgentInfo;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
@@ -207,7 +207,14 @@ class HsaRsrcFactory {
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Return AqlProfile API table
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t;
|
||||
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
private:
|
||||
// Load AQL profile HSA extension library directly
|
||||
static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api);
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory();
|
||||
@@ -229,6 +236,9 @@ class HsaRsrcFactory {
|
||||
|
||||
// Used to maintain a list of Hsa Cpu Agent Info
|
||||
std::vector<AgentInfo*> cpu_list_;
|
||||
|
||||
// AqlProfile API table
|
||||
aqlprofile_pfn_t aqlprofile_api_;
|
||||
};
|
||||
|
||||
#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
@@ -27,6 +27,33 @@ class Xml {
|
||||
|
||||
enum { DECL_STATE, BODY_STATE };
|
||||
|
||||
static Xml* Create(const char* file_name) {
|
||||
Xml* xml = new Xml(file_name);
|
||||
if (xml->fd_ == -1) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
}
|
||||
return xml;
|
||||
}
|
||||
|
||||
static void Destroy(Xml *xml) { delete xml; }
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) { return map_[global_tag]; }
|
||||
|
||||
void Print() const {
|
||||
for (auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Xml(const char* file_name)
|
||||
: file_name_(file_name),
|
||||
file_line_(0),
|
||||
@@ -39,7 +66,7 @@ class Xml {
|
||||
|
||||
fd_ = open(file_name, O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
std::cout << "XML file not found: " << file_name << std::endl;
|
||||
perror("open XML file");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -117,22 +144,8 @@ class Xml {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) { return map_[global_tag]; }
|
||||
~Xml() {}
|
||||
|
||||
void Print() const {
|
||||
for (auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
|
||||
Verwijs in nieuw issue
Block a user