From 24695e37be1e22c09166853acb165592b7054bdb Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 19 Dec 2017 15:32:34 -0600 Subject: [PATCH] several fixes; adding direct loading of alprofile library [ROCm/rocprofiler commit: 8270530fec4f8331c305b80bdaad29438a6bd993] --- .../rocprofiler/src/core/intercept_queue.h | 6 +- projects/rocprofiler/src/core/metrics.h | 7 +- projects/rocprofiler/src/core/proxy_queue.cpp | 4 +- .../src/core/simple_proxy_queue.cpp | 2 +- .../rocprofiler/src/core/simple_proxy_queue.h | 24 ++++-- .../rocprofiler/src/util/hsa_rsrc_factory.cpp | 39 +++++++++- .../rocprofiler/src/util/hsa_rsrc_factory.h | 8 +- projects/rocprofiler/src/xml/xml.h | 45 +++++++---- projects/rocprofiler/test/ctrl/test_hsa.cpp | 1 + .../rocprofiler/test/ctrl/test_pgen_rocp.h | 78 ------------------- projects/rocprofiler/test/ctrl/test_pmgr.cpp | 8 +- projects/rocprofiler/test/ctrl/test_pmgr.h | 2 +- projects/rocprofiler/test/ctrl/tool.cpp | 6 +- .../test/util/hsa_rsrc_factory.cpp | 40 ++++++++++ .../rocprofiler/test/util/hsa_rsrc_factory.h | 16 +++- projects/rocprofiler/test/util/xml.h | 45 +++++++---- 16 files changed, 195 insertions(+), 136 deletions(-) delete mode 100644 projects/rocprofiler/test/ctrl/test_pgen_rocp.h diff --git a/projects/rocprofiler/src/core/intercept_queue.h b/projects/rocprofiler/src/core/intercept_queue.h index f18d643194..bb50a4c238 100644 --- a/projects/rocprofiler/src/core/intercept_queue.h +++ b/projects/rocprofiler/src/core/intercept_queue.h @@ -55,12 +55,14 @@ class InterceptQueue { ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); - if (status != HSA_STATUS_SUCCESS) { + if (status == HSA_STATUS_SUCCESS) { InterceptQueue* obj = new InterceptQueue(agent, proxy); (*obj_map_)[(uint64_t)(*queue)] = obj; status = proxy->SetInterceptCB(OnSubmitCB, obj); } + if (status != HSA_STATUS_SUCCESS) abort(); + return status; } @@ -97,7 +99,7 @@ class InterceptQueue { dispatch_packet->kernel_object, GetKernelName(dispatch_packet)}; hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group); - if (status == HSA_STATUS_SUCCESS) { + if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) { Context* context = reinterpret_cast(group.context); const pkt_vector_t& start_vector = context->StartPackets(group.index); const pkt_vector_t& stop_vector = context->StopPackets(group.index); diff --git a/projects/rocprofiler/src/core/metrics.h b/projects/rocprofiler/src/core/metrics.h index 5590063e51..415dae86ee 100644 --- a/projects/rocprofiler/src/core/metrics.h +++ b/projects/rocprofiler/src/core/metrics.h @@ -110,13 +110,18 @@ class MetricsDict { MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) { const char* xml_name = getenv("ROCP_METRICS"); if (xml_name != NULL) { - xml_ = new xml::Xml(xml_name); + xml_ = xml::Xml::Create(xml_name); + if (xml_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metrics .xml open error '" << xml_name << "'"); std::cout << "ROCProfiler: importing metrics from '" << xml_name << "':" << std::endl; ImportMetrics(agent_info, agent_info->gfxip); ImportMetrics(agent_info, "global"); } } + ~MetricsDict() { + xml::Xml::Destroy(xml_); + } + void ImportMetrics(const util::AgentInfo* agent_info, const char* scope) { auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric"); if (!scope_list.empty()) { diff --git a/projects/rocprofiler/src/core/proxy_queue.cpp b/projects/rocprofiler/src/core/proxy_queue.cpp index 5f76980d51..54c93801c8 100644 --- a/projects/rocprofiler/src/core/proxy_queue.cpp +++ b/projects/rocprofiler/src/core/proxy_queue.cpp @@ -24,7 +24,7 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3 ProxyQueue* instance = new SimpleProxyQueue(); #endif if (instance != NULL) { - const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, + suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue); if (suc != HSA_STATUS_SUCCESS) { delete instance; @@ -32,10 +32,12 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3 } } *status = suc; + assert(*status == HSA_STATUS_SUCCESS); return instance; } hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) { + assert(obj != NULL); auto suc = obj->Cleanup(); delete obj; return suc; diff --git a/projects/rocprofiler/src/core/simple_proxy_queue.cpp b/projects/rocprofiler/src/core/simple_proxy_queue.cpp index e138a042f6..cfb25f2c4e 100644 --- a/projects/rocprofiler/src/core/simple_proxy_queue.cpp +++ b/projects/rocprofiler/src/core/simple_proxy_queue.cpp @@ -7,5 +7,5 @@ void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) { table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::StoreIndex; } -std::map SimpleProxyQueue::queue_map_; +SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL; } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/simple_proxy_queue.h b/projects/rocprofiler/src/core/simple_proxy_queue.h index bdfbcf6cb7..9739c5e5d1 100644 --- a/projects/rocprofiler/src/core/simple_proxy_queue.h +++ b/projects/rocprofiler/src/core/simple_proxy_queue.h @@ -24,8 +24,8 @@ class SimpleProxyQueue : public ProxyQueue { static void HsaIntercept(HsaApiTable* table); static void SignalStore(hsa_signal_t signal, hsa_signal_value_t que_idx) { - auto it = queue_map_.find(signal.handle); - if (it != queue_map_.end()) { + auto it = queue_map_->find(signal.handle); + if (it != queue_map_->end()) { SimpleProxyQueue* instance = it->second; const uint64_t begin = instance->submit_index_; const uint64_t end = que_idx + 1; @@ -46,8 +46,8 @@ class SimpleProxyQueue : public ProxyQueue { static uint64_t LoadIndex(const hsa_queue_t* queue) { uint64_t index = 0; - auto it = queue_map_.find(queue->doorbell_signal.handle); - if (it != queue_map_.end()) { + auto it = queue_map_->find(queue->doorbell_signal.handle); + if (it != queue_map_->end()) { SimpleProxyQueue* instance = it->second; instance->mutex_.lock(); index = instance->queue_index_; @@ -58,8 +58,8 @@ class SimpleProxyQueue : public ProxyQueue { } static void StoreIndex(const hsa_queue_t* queue, uint64_t value) { - auto it = queue_map_.find(queue->doorbell_signal.handle); - if (it != queue_map_.end()) { + auto it = queue_map_->find(queue->doorbell_signal.handle); + if (it != queue_map_->end()) { SimpleProxyQueue* instance = it->second; instance->queue_index_ = value; instance->mutex_.unlock(); @@ -115,6 +115,8 @@ class SimpleProxyQueue : public ProxyQueue { ~SimpleProxyQueue() {} private: + typedef std::map queue_map_t; + hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data, uint32_t private_segment_size, uint32_t group_segment_size, @@ -129,6 +131,7 @@ class SimpleProxyQueue : public ProxyQueue { agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); if (agent_info_ != NULL) { if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) { + printf("queue_create size 0x%x(%d)\n", size, (int)size); status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue_); if (status == HSA_STATUS_SUCCESS) { @@ -138,11 +141,16 @@ class SimpleProxyQueue : public ProxyQueue { uintptr_t addr = (uintptr_t)data_array_; queue_->base_address = (void*)((addr + align_mask_) & ~align_mask_); status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal)); + if (status != HSA_STATUS_SUCCESS) abort(); queue_mask_ = size - 1; - queue_map_[queue_->doorbell_signal.handle] = this; + + if (queue_map_ == NULL) queue_map_ = new queue_map_t; + (*queue_map_)[queue_->doorbell_signal.handle] = this; } + else abort(); } } + if (status != HSA_STATUS_SUCCESS) abort(); return status; } @@ -155,7 +163,7 @@ class SimpleProxyQueue : public ProxyQueue { return status; } - static std::map queue_map_; + static queue_map_t* queue_map_; const util::AgentInfo* agent_info_; hsa_queue_t* queue_; static const uintptr_t align_mask_ = sizeof(packet_t) - 1; diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp index 62573aeaa8..a0e13accac 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp @@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "util/hsa_rsrc_factory.h" +#include #include #include #include @@ -85,7 +86,10 @@ HsaRsrcFactory::HsaRsrcFactory() { // Get AqlProfile API table aqlprofile_api_ = {0}; status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); - CHECK_STATUS("aqlprofile API table query failed", status); +#ifdef ROCP_LOAD_AQLPROF + if (status != HSA_STATUS_SUCCESS) status = LoadAqlProfileLib(&aqlprofile_api_); +#endif + CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; @@ -99,6 +103,39 @@ HsaRsrcFactory::~HsaRsrcFactory() { CHECK_STATUS("Error in hsa_shut_down", status); } +hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ + + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = + (decltype(::hsa_ven_amd_aqlprofile_get_info)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data"); + + return HSA_STATUS_SUCCESS; +} + // Add system agent info const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { // Determine if device is a Gpu agent diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.h b/projects/rocprofiler/src/util/hsa_rsrc_factory.h index 40a8f887a7..105483b7d0 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.h @@ -222,7 +222,8 @@ class HsaRsrcFactory { bool PrintGpuAgents(const std::string& header); // Return AqlProfile API table - const hsa_ven_amd_aqlprofile_1_00_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } + typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; + const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } // Return Loader API table const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } @@ -234,6 +235,9 @@ class HsaRsrcFactory { // Callback function to find and bind kernarg region of an agent static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data); + // Load AQL profile HSA extension library directly + static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api); + // Constructor of the class. Will initialize the Hsa Runtime and // query the system topology to get the list of Cpu and Gpu devices HsaRsrcFactory(); @@ -257,7 +261,7 @@ class HsaRsrcFactory { std::map agent_map_; // AqlProfile API table - hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_; + aqlprofile_pfn_t aqlprofile_api_; // Loader API table hsa_ven_amd_loader_1_00_pfn_t loader_api_; diff --git a/projects/rocprofiler/src/xml/xml.h b/projects/rocprofiler/src/xml/xml.h index c790ea618b..d6da6f779b 100644 --- a/projects/rocprofiler/src/xml/xml.h +++ b/projects/rocprofiler/src/xml/xml.h @@ -27,6 +27,33 @@ class Xml { enum { DECL_STATE, BODY_STATE }; + static Xml* Create(const char* file_name) { + Xml* xml = new Xml(file_name); + if (xml->fd_ == -1) { + delete xml; + xml = NULL; + } + return xml; + } + + static void Destroy(Xml *xml) { delete xml; } + + std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } + + void Print() const { + for (auto& elem : map_) { + for (auto node : elem.second) { + if (node->opts.size()) { + std::cout << elem.first << ":" << std::endl; + for (auto& opt : node->opts) { + std::cout << " " << opt.first << " = " << opt.second << std::endl; + } + } + } + } + } + + private: Xml(const char* file_name) : file_name_(file_name), file_line_(0), @@ -39,7 +66,7 @@ class Xml { fd_ = open(file_name, O_RDONLY); if (fd_ == -1) { - std::cout << "XML file not found: " << file_name << std::endl; + perror("open XML file"); return; } @@ -117,22 +144,8 @@ class Xml { } } - std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } + ~Xml() {} - void Print() const { - for (auto& elem : map_) { - for (auto node : elem.second) { - if (node->opts.size()) { - std::cout << elem.first << ":" << std::endl; - for (auto& opt : node->opts) { - std::cout << " " << opt.first << " = " << opt.second << std::endl; - } - } - } - } - } - - private: bool LineEndCheck() { bool found = false; if (buffer_[index_] == '\n') { diff --git a/projects/rocprofiler/test/ctrl/test_hsa.cpp b/projects/rocprofiler/test/ctrl/test_hsa.cpp index 30aeb9e942..cec8842f2d 100644 --- a/projects/rocprofiler/test/ctrl/test_hsa.cpp +++ b/projects/rocprofiler/test/ctrl/test_hsa.cpp @@ -61,6 +61,7 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) { uint32_t num_pkts = 128; if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { hsa_queue_ = NULL; + TEST_ASSERT(false); } } } diff --git a/projects/rocprofiler/test/ctrl/test_pgen_rocp.h b/projects/rocprofiler/test/ctrl/test_pgen_rocp.h deleted file mode 100644 index bdba66ecbf..0000000000 --- a/projects/rocprofiler/test/ctrl/test_pgen_rocp.h +++ /dev/null @@ -1,78 +0,0 @@ -/****************************************************************************** - -Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this list -of conditions and the following disclaimer. - -Redistributions in binary form must reproduce the above copyright notice, this -list of conditions and the following disclaimer in the documentation and/or -other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*******************************************************************************/ - -#ifndef TEST_CTRL_TEST_PGEN_ROCP_H_ -#define TEST_CTRL_TEST_PGEN_ROCP_H_ - -#include -#include - -#include "ctrl/test_pgen.h" -#include "util/test_assert.h" - -hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, - hsa_ven_amd_aqlprofile_info_data_t* info_data, - void* callback_data) { - hsa_status_t status = HSA_STATUS_SUCCESS; - typedef std::vector passed_data_t; - reinterpret_cast(callback_data)->push_back(*info_data); - return status; -} - -// Class implements PMC profiling -class TestPGenRocp : public TestPGen { - public: - explicit TestPGenRocp(TestAql* t) : TestPGen(t) { std::clog << "Test: PGen ROCP" << std::endl; } - - bool Initialize(int /*arg_cnt*/, char** /*arg_list*/) { - status = rocprofiler_on_dispatch(&profile_, PrePacket(), PostPacket()); - TEST_STATUS(status != HSA_STATUS_SUCCESS); - return (status == HSA_STATUS_SUCCESS); - } - - private: - bool BuildPackets() { return true; } - - bool DumpData() { - std::clog << "TestPGenRocp::DumpData :" << std::endl; - - typedef std::vector callback_data_t; - - callback_data_t data; - api_.hsa_ven_amd_aqlprofile_iterate_data(&profile_, TestPGenRocpCallback, &data); - for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) { - std::cout << std::dec << "event(block(" << it->pmc_data.event.block_name << "_" - << it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id - << ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")" - << std::endl; - } - - return true; - } -}; - -#endif // TEST_CTRL_TEST_PGEN_ROCP_H_ diff --git a/projects/rocprofiler/test/ctrl/test_pmgr.cpp b/projects/rocprofiler/test/ctrl/test_pmgr.cpp index a78d76c754..b8e8197985 100644 --- a/projects/rocprofiler/test/ctrl/test_pmgr.cpp +++ b/projects/rocprofiler/test/ctrl/test_pmgr.cpp @@ -62,7 +62,7 @@ bool TestPMgr::AddPacketGfx8(const packet_t* packet) { // Create legacy devices PM4 data const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet; slot_pm4_t data; - api_.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast(data.words)); + api_->hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast(data.words)); // Compute the write index of queue and copy Aql packet into it uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue()); @@ -128,16 +128,14 @@ bool TestPMgr::Initialize(int argc, char** argv) { hsa_status_t status = HSA_STATUS_ERROR; status = hsa_signal_create(1, 0, NULL, &post_signal_); TEST_ASSERT(status == HSA_STATUS_SUCCESS); - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api_); - TEST_ASSERT(status == HSA_STATUS_SUCCESS); + api_ = HsaRsrcFactory::Instance().AqlProfileApi();; return true; } -TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_({0}) { +TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_(NULL) { memset(&pre_packet_, 0, sizeof(pre_packet_)); memset(&post_packet_, 0, sizeof(post_packet_)); dummy_signal_.handle = 0; post_signal_ = dummy_signal_; - memset(&api_, 0, sizeof(api_)); } diff --git a/projects/rocprofiler/test/ctrl/test_pmgr.h b/projects/rocprofiler/test/ctrl/test_pmgr.h index 3998dc1f03..7e38e39655 100644 --- a/projects/rocprofiler/test/ctrl/test_pmgr.h +++ b/projects/rocprofiler/test/ctrl/test_pmgr.h @@ -47,7 +47,7 @@ class TestPMgr : public TestAql { hsa_signal_t dummy_signal_; hsa_signal_t post_signal_; - hsa_ven_amd_aqlprofile_1_00_pfn_t api_; + HsaRsrcFactory::aqlprofile_pfn_t* api_; virtual bool BuildPackets() { return false; } virtual bool DumpData() { return false; } diff --git a/projects/rocprofiler/test/ctrl/tool.cpp b/projects/rocprofiler/test/ctrl/tool.cpp index 45972f52a2..cf3acb04fa 100644 --- a/projects/rocprofiler/test/ctrl/tool.cpp +++ b/projects/rocprofiler/test/ctrl/tool.cpp @@ -320,7 +320,11 @@ CONSTRUCTOR_API void constructor() { exit(1); } printf("ROCProfiler: input from \"%s\"\n", xml_name); - xml::Xml* xml = new xml::Xml(xml_name); + xml::Xml* xml = xml::Xml::Create(xml_name); + if (xml == NULL) { + fprintf(stderr, "Input file not found '%s'\n", xml_name); + exit(1); + } // Getting metrics auto metrics_list = xml->GetNodes("top.metric"); diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp index 139b618de9..c1080ba887 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp @@ -24,6 +24,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "util/hsa_rsrc_factory.h" +#include #include #include #include @@ -116,6 +117,12 @@ HsaRsrcFactory::HsaRsrcFactory() { // Discover the set of Gpu devices available on the platform status = hsa_iterate_agents(GetHsaAgentsCallback, this); CHECK_STATUS("Error Calling hsa_iterate_agents", status); + + // Get AqlProfile API table + aqlprofile_api_ = {0}; + status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); + if (status != HSA_STATUS_SUCCESS) status = LoadAqlProfileLib(&aqlprofile_api_); + CHECK_STATUS("aqlprofile API table load failed", status); } // Destructor of the class @@ -124,6 +131,39 @@ HsaRsrcFactory::~HsaRsrcFactory() { CHECK_STATUS("Error in hsa_shut_down", status); } +hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ + + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = + (decltype(::hsa_ven_amd_aqlprofile_get_info)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) + dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data"); + + return HSA_STATUS_SUCCESS; +} + // Get the count of Hsa Gpu Agents available on the platform // // @return uint32_t Number of Gpu agents on platform diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.h b/projects/rocprofiler/test/util/hsa_rsrc_factory.h index 88235e62bf..2cab71a74d 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.h @@ -27,6 +27,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -54,7 +55,7 @@ static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; // Encapsulates information about a Hsa Agent such as its // handle, name, max queue size, max wavefront size, etc. -typedef struct { +struct AgentInfo { // Handle of Agent hsa_agent_t dev_id; @@ -78,8 +79,7 @@ typedef struct { // Memory region supporting kernel arguments hsa_region_t kernarg_region; - -} AgentInfo; +}; class HsaRsrcFactory { public: @@ -207,7 +207,14 @@ class HsaRsrcFactory { // Print the various fields of Hsa Gpu Agents bool PrintGpuAgents(const std::string& header); + // Return AqlProfile API table + typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; + const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } + private: + // Load AQL profile HSA extension library directly + static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api); + // Constructor of the class. Will initialize the Hsa Runtime and // query the system topology to get the list of Cpu and Gpu devices HsaRsrcFactory(); @@ -229,6 +236,9 @@ class HsaRsrcFactory { // Used to maintain a list of Hsa Cpu Agent Info std::vector cpu_list_; + + // AqlProfile API table + aqlprofile_pfn_t aqlprofile_api_; }; #endif // TEST_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/projects/rocprofiler/test/util/xml.h b/projects/rocprofiler/test/util/xml.h index 9fa88541e3..890c687a43 100644 --- a/projects/rocprofiler/test/util/xml.h +++ b/projects/rocprofiler/test/util/xml.h @@ -27,6 +27,33 @@ class Xml { enum { DECL_STATE, BODY_STATE }; + static Xml* Create(const char* file_name) { + Xml* xml = new Xml(file_name); + if (xml->fd_ == -1) { + delete xml; + xml = NULL; + } + return xml; + } + + static void Destroy(Xml *xml) { delete xml; } + + std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } + + void Print() const { + for (auto& elem : map_) { + for (auto node : elem.second) { + if (node->opts.size()) { + std::cout << elem.first << ":" << std::endl; + for (auto& opt : node->opts) { + std::cout << " " << opt.first << " = " << opt.second << std::endl; + } + } + } + } + } + + private: Xml(const char* file_name) : file_name_(file_name), file_line_(0), @@ -39,7 +66,7 @@ class Xml { fd_ = open(file_name, O_RDONLY); if (fd_ == -1) { - std::cout << "XML file not found: " << file_name << std::endl; + perror("open XML file"); return; } @@ -117,22 +144,8 @@ class Xml { } } - std::vector GetNodes(std::string global_tag) { return map_[global_tag]; } + ~Xml() {} - void Print() const { - for (auto& elem : map_) { - for (auto node : elem.second) { - if (node->opts.size()) { - std::cout << elem.first << ":" << std::endl; - for (auto& opt : node->opts) { - std::cout << " " << opt.first << " = " << opt.second << std::endl; - } - } - } - } - } - - private: bool LineEndCheck() { bool found = false; if (buffer_[index_] == '\n') {