diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp index b24850eeae..2d64bae0b5 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.cpp @@ -134,13 +134,13 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_LOADER, 1, 0, &loader_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer @@ -527,6 +527,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Print the various fields of Hsa Gpu Agents bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::cout << std::flush; std::clog << header << " :" << std::endl; const AgentInfo* agent_info; diff --git a/projects/rocprofiler/src/util/hsa_rsrc_factory.h b/projects/rocprofiler/src/util/hsa_rsrc_factory.h index d269ceb2fb..9997a81c24 100644 --- a/projects/rocprofiler/src/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/src/util/hsa_rsrc_factory.h @@ -123,7 +123,7 @@ struct AgentInfo { // HSA timer class // Provides current HSA timestampa and system-clock/ns conversion API class HsaTimer { - public: + public: typedef uint64_t timestamp_t; static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; @@ -136,8 +136,12 @@ class HsaTimer { } // Methids for system-clock/ns conversion - timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { return timestamp_t((freq_t)sysclock * sysclock_factor_); } - timestamp_t ns_to_sysclock(const timestamp_t& time) const { return timestamp_t((freq_t)time / sysclock_factor_); } + timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { + return timestamp_t((freq_t)sysclock * sysclock_factor_); + } + timestamp_t ns_to_sysclock(const timestamp_t& time) const { + return timestamp_t((freq_t)time / sysclock_factor_); + } // Return timestamp in 'ns' timestamp_t timestamp_ns() const { @@ -147,7 +151,7 @@ class HsaTimer { return sysclock_to_ns(sysclock); } - private: + private: // Timestamp frequency factor freq_t sysclock_factor_; }; @@ -273,7 +277,7 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); // Return AqlProfile API table - typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; + typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } // Return Loader API table diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp index 5404608b5f..0293c6c474 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.cpp @@ -128,13 +128,13 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_LOADER, 1, 0, &loader_api_); + status = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer @@ -520,6 +520,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Print the various fields of Hsa Gpu Agents bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::cout << std::flush; std::clog << header << " :" << std::endl; const AgentInfo* agent_info; @@ -543,7 +544,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { } uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { - const uint32_t slot_size_b = 0x40; + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; // adevance command queue const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue); @@ -571,7 +572,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { } uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { - const uint32_t slot_size_b = 0x40; + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; if ((size_bytes & (slot_size_b - 1)) != 0) { fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); abort(); diff --git a/projects/rocprofiler/test/util/hsa_rsrc_factory.h b/projects/rocprofiler/test/util/hsa_rsrc_factory.h index c9466f89e4..738a8e2fd8 100644 --- a/projects/rocprofiler/test/util/hsa_rsrc_factory.h +++ b/projects/rocprofiler/test/util/hsa_rsrc_factory.h @@ -121,7 +121,7 @@ struct AgentInfo { // HSA timer class // Provides current HSA timestampa and system-clock/ns conversion API class HsaTimer { - public: + public: typedef uint64_t timestamp_t; static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; @@ -134,8 +134,12 @@ class HsaTimer { } // Methids for system-clock/ns conversion - timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { return timestamp_t((freq_t)sysclock * sysclock_factor_); } - timestamp_t ns_to_sysclock(const timestamp_t& time) const { return timestamp_t((freq_t)time / sysclock_factor_); } + timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { + return timestamp_t((freq_t)sysclock * sysclock_factor_); + } + timestamp_t ns_to_sysclock(const timestamp_t& time) const { + return timestamp_t((freq_t)time / sysclock_factor_); + } // Return timestamp in 'ns' timestamp_t timestamp_ns() const { @@ -145,13 +149,14 @@ class HsaTimer { return sysclock_to_ns(sysclock); } - private: + private: // Timestamp frequency factor freq_t sysclock_factor_; }; class HsaRsrcFactory { public: + static const size_t CMD_SLOT_SIZE_B = 0x40; typedef std::recursive_mutex mutex_t; typedef HsaTimer::timestamp_t timestamp_t; @@ -270,7 +275,7 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); // Return AqlProfile API table - typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; + typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } // Return Loader API table