diff --git a/projects/rocprofiler/Readme.txt b/projects/rocprofiler/Readme.txt index e9d044b0ec..6ab4a3bc82 100644 --- a/projects/rocprofiler/Readme.txt +++ b/projects/rocprofiler/Readme.txt @@ -1,8 +1,8 @@ ROC Profiler library. -Profiling with metrics and traces based perfcounters (PMC) and SQ threadtraces (SQTT) -and AqlProfile HSA extension. +Profiling with metrics and traces based on perfcounters (PMC) and SQ threadtraces (SQTT). +Implementation is based on AqlProfile HSA extension. +Library supports GFX8/GFX9. -Library supports GFX8/GFX9 APIs. The library source tree: - doc - Documentation - inc/rocprofiler.h - Library public API diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index dbba323f68..3948ec2391 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -17,6 +17,10 @@ #include "util/hsa_rsrc_factory.h" #include "util/logger.h" +#ifndef AQL_PROFILE_READ_API_ENABLE +#define AQL_PROFILE_READ_API_ENABLE 0 +#endif + namespace rocprofiler { struct rocprofiler_contex_t; class Context; @@ -310,7 +314,8 @@ class Context { submit_queue->Submit(&stop_packets[0], stop_packets.size()); } void Read(const uint32_t& group_index, Queue* const queue = NULL) { - const pkt_vector_t& read_packets = StopPackets(group_index); + const pkt_vector_t& read_packets = ReadPackets(group_index); + if (read_packets.size() == 0) EXC_RAISING(HSA_STATUS_ERROR, "Read API disabled"); Queue* const submit_queue = (queue != NULL) ? queue : queue_; submit_queue->Submit(&read_packets[0], read_packets.size()); } diff --git a/projects/rocprofiler/src/core/profile.h b/projects/rocprofiler/src/core/profile.h index 90981ee621..1e13300c7f 100644 --- a/projects/rocprofiler/src/core/profile.h +++ b/projects/rocprofiler/src/core/profile.h @@ -10,10 +10,6 @@ #include "util/exception.h" #include "util/hsa_rsrc_factory.h" -#ifndef AQL_PROFILE_READ_API_ENABLE -#define AQL_PROFILE_READ_API_ENABLE 0 -#endif - namespace rocprofiler { struct profile_info_t { const event_t* event; @@ -86,6 +82,7 @@ class Profile { completion_signal_ = {}; is_legacy_ = (strncmp(agent_info->name, "gfx8", 4) == 0); } + virtual ~Profile() { info_vector_.clear(); if (profile_.command_buffer.ptr) util::HsaRsrcFactory::FreeMemory(profile_.command_buffer.ptr); @@ -115,18 +112,18 @@ class Profile { if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start(NULL)"); status = Allocate(rsrc); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "Allocate()"); - // Generate start/stop profiling packets + + // Generate start/stop/read profiling packets status = api->hsa_ven_amd_aqlprofile_start(&profile_, &start); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start"); status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop"); + hsa_status_t rd_status = api->hsa_ven_amd_aqlprofile_read(&profile_, &read); +#if 0 // Read API returns error if disabled + if (rd_status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_read"); +#endif -#if AQL_PROFILE_READ_API_ENABLE - status = api->hsa_ven_amd_aqlprofile_read(&profile_, &read); - if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_read"); -#endif // AQL_PROFILE_READ_API_ENABLE - - // Set completion signals + // Set completion signal hsa_signal_t dummy_signal{}; dummy_signal.handle = 0; start.completion_signal = dummy_signal; @@ -137,6 +134,7 @@ class Profile { read.completion_signal = post_signal; completion_signal_ = post_signal; + // Fill packet vectors if (is_legacy_) { const uint32_t start_index = start_vector.size(); const uint32_t stop_index = stop_vector.size(); @@ -148,23 +146,26 @@ class Profile { &start, reinterpret_cast(&start_vector[start_index])); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4( &stop, reinterpret_cast(&stop_vector[stop_index])); if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); -#if AQL_PROFILE_READ_API_ENABLE - const uint32_t read_index = read_vector.size(); - read_vector.insert(read_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); - status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4( - &read, reinterpret_cast(&read_vector[read_index])); - if (status != HSA_STATUS_SUCCESS) - AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); -#endif // AQL_PROFILE_READ_API_ENABLE + if (rd_status == HSA_STATUS_SUCCESS) { + const uint32_t read_index = read_vector.size(); + read_vector.insert(read_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4( + &read, reinterpret_cast(&read_vector[read_index])); + if (status != HSA_STATUS_SUCCESS) + AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + } } else { start_vector.push_back(start); stop_vector.push_back(stop); - read_vector.push_back(read); + if (rd_status == HSA_STATUS_SUCCESS) { + read_vector.push_back(read); + } } } diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp index 47173b38c0..4aeae33d1e 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp @@ -44,10 +44,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#ifndef AQL_PROFILE_READ_API_ENABLE -#define AQL_PROFILE_READ_API_ENABLE 0 -#endif - namespace rocprofiler { namespace util { @@ -166,10 +162,8 @@ hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); api->hsa_ven_amd_aqlprofile_stop = (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); -#if AQL_PROFILE_READ_API_ENABLE api->hsa_ven_amd_aqlprofile_read = (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); -#endif // AQL_PROFILE_READ_API_ENABLE api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp index 3573afc8c5..03eecd4970 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp @@ -44,11 +44,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#ifndef AQL_PROFILE_READ_API_ENABLE -#define AQL_PROFILE_READ_API_ENABLE 0 -#endif - - // Callback function to get available in the system agents hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) { hsa_status_t status = HSA_STATUS_ERROR; @@ -164,10 +159,8 @@ hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); api->hsa_ven_amd_aqlprofile_stop = (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); -#if AQL_PROFILE_READ_API_ENABLE api->hsa_ven_amd_aqlprofile_read = (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); -#endif // AQL_PROFILE_READ_API_ENABLE api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");