diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/common.h b/projects/rocprofiler-sdk/samples/pc_sampling/common.h index d3032c6f93..ff9b7a27db 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/common.h +++ b/projects/rocprofiler-sdk/samples/pc_sampling/common.h @@ -49,20 +49,20 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) { *_out_agent = *agents[i]; - printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", __FUNCTION__, _out_agent->name, - _out_agent->id.handle, + _out_agent->node_id, _out_agent->type, _out_agent->num_pc_sampling_configs); return ROCPROFILER_STATUS_SUCCESS; } else { - printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", __FUNCTION__, agents[i]->name, - agents[i]->id.handle, + agents[i]->node_id, agents[i]->type, agents[i]->num_pc_sampling_configs); } diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp index f9dd7df8d5..d5de95bfcf 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp @@ -38,20 +38,20 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen _out_agents->push_back(*agents[i]); - printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", __FUNCTION__, agents[i]->name, - agents[i]->id.handle, + agents[i]->node_id, agents[i]->type, agents[i]->num_pc_sampling_configs); return ROCPROFILER_STATUS_SUCCESS; } else { - printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n", + printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", __FUNCTION__, agents[i]->name, - agents[i]->id.handle, + agents[i]->node_id, agents[i]->type, agents[i]->num_pc_sampling_configs); } diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h index 43184f9cd8..a1e979e781 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h @@ -185,7 +185,10 @@ typedef struct rocprofiler_agent_t ///< do not assume the number of PC sampling configurations ///< based on the device type. const rocprofiler_pc_sampling_configuration_t* - pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types. + pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types. + uint32_t node_id; ///< Node sequence number. This will be equivalent to the HSA-runtime + ///< HSA_AMD_AGENT_INFO_DRIVER_NODE_ID property + uint32_t reserved0; ///< reserved padding } rocprofiler_agent_t; /** diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt index bad117589e..6d7626a772 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/CMakeLists.txt @@ -3,8 +3,8 @@ # rocprofiler_activate_clang_tidy() -set(ROCPROFILER_LIB_HEADERS buffer.hpp external_correlation.hpp internal_threading.hpp - registration.hpp) +set(ROCPROFILER_LIB_HEADERS agent.hpp buffer.hpp external_correlation.hpp + internal_threading.hpp registration.hpp) set(ROCPROFILER_LIB_SOURCES agent.cpp buffer.cpp diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp index 6a69c6cf45..f6287e7167 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp @@ -24,8 +24,12 @@ #include #include +#include "lib/rocprofiler/agent.hpp" +#include "lib/rocprofiler/hsa/agent_cache.hpp" + #include #include +#include #include #include @@ -161,20 +165,36 @@ get_cpu_info() return _v; } +// check to see if the file is readable +bool +is_readable(const fs::path& fpath) +{ + auto ec = std::error_code{}; + auto perms = fs::status(fpath, ec).permissions(); + LOG_IF(ERROR, ec) << fmt::format( + "Error getting status for file '{}': {}", fpath.string(), ec.message()); + return (!ec && (perms & fs::perms::owner_read) != fs::perms::none); +} + auto read_file(const std::string& fname) { auto data = std::vector{}; - auto ifs = std::ifstream{fname}; - if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + if(!is_readable(fs::path{fname})) + throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + auto ifs = std::ifstream{fname}; + if(!ifs || !ifs.good()) + throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; while(true) { auto value = std::string{}; ifs >> value; - if(ifs.eof()) break; + if(ifs.eof() || value.empty()) break; - if(!value.empty()) data.emplace_back(value); + data.emplace_back(value); } return data; @@ -184,14 +204,20 @@ auto read_map(const std::string& fname) { auto data = std::unordered_map{}; - auto ifs = std::ifstream{fname}; - if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + if(!is_readable(fs::path{fname})) + throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + auto ifs = std::ifstream{fname}; + if(!ifs || !ifs.good()) + throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + auto last_label = std::string{}; while(true) { auto label = std::string{}; ifs >> label; - if(ifs.eof()) break; + if(ifs.eof() || label.empty()) break; auto entry = std::string{}; ifs >> entry; @@ -201,7 +227,14 @@ read_map(const std::string& fname) auto ret = data.emplace(label, entry); if(!ret.second) - throw std::runtime_error{fmt::format("duplicate entry in '{}': {}", fname, label)}; + throw std::runtime_error{ + fmt::format("duplicate entry in '{}': '{}' (='{}'). last label was '{}'", + fname, + label, + entry, + last_label)}; + + if(!label.empty()) last_label = std::move(label); } return data; @@ -297,13 +330,18 @@ read_topology() const auto& cpu_info_v = get_cpu_info(); auto data = std::vector{}; - uint64_t n = 0; + uint64_t idcount = 0; + uint64_t nodecount = 0; while(true) { - auto idx = n++; + auto idx = idcount++; auto node_path = sysfs_nodes_path / std::to_string(idx); + // assumes that nodes are monotonically increasing and thus once we are missing a node + // folder for a number, there are no more nodes if(!fs::exists(node_path)) break; + // skip if we don't have permission to read the file + if(!is_readable(node_path)) continue; auto properties = std::unordered_map{}; auto name_prop = std::vector{}; @@ -320,12 +358,16 @@ read_topology() continue; } + // we may have been able to open the properties file but if it was empty, we ignore it + if(properties.empty()) continue; + auto agent_info = rocprofiler_agent_t{}; memset(&agent_info, 0, sizeof(agent_info)); agent_info.size = sizeof(rocprofiler_agent_t); agent_info.id.handle = idx; agent_info.type = ROCPROFILER_AGENT_TYPE_NONE; + agent_info.node_id = nodecount++; if(!name_prop.empty()) agent_info.model_name = strdup(name_prop.front().c_str()); @@ -568,7 +610,209 @@ get_agent_topology() static auto _v = read_topology(); return _v; } + +auto& +get_agent_caches() +{ + static auto _v = std::vector{}; + return _v; +} } // namespace + +std::vector +get_agents() +{ + auto& agents = rocprofiler::agent::get_agent_topology(); + auto pointers = std::vector{}; + pointers.reserve(agents.size()); + for(auto& agent : agents) + { + pointers.emplace_back(agent.get()); + } + return pointers; +} + +void +construct_agent_cache(::HsaApiTable* table) +{ + if(!table) return; + + auto rocp_agents = agent::get_agents(); + auto hsa_agents = std::vector{}; + + // Get HSA Agents + table->core_->hsa_iterate_agents_fn( + [](hsa_agent_t agent, void* data) { + CHECK_NOTNULL(static_cast*>(data))->emplace_back(agent); + return HSA_STATUS_SUCCESS; + }, + &hsa_agents); + + LOG_IF(FATAL, rocp_agents.size() != hsa_agents.size()) + << "Found " << rocp_agents.size() << " rocprofiler agents and " << hsa_agents.size() + << " HSA agents"; + + auto hsa_agent_node_map = std::unordered_map{}; + for(const auto& itr : hsa_agents) + { + if(uint32_t node_id = 0; + table->core_->hsa_agent_get_info_fn( + itr, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id) == + HSA_STATUS_SUCCESS) + { + hsa_agent_node_map[node_id] = itr; + } + } + + auto agent_map = + std::unordered_map>{}; + for(const auto* ritr : rocp_agents) + { + for(auto hitr : hsa_agents) + { + if(uint32_t node_id = 0; + table->core_->hsa_agent_get_info_fn( + hitr, + static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), + &node_id) == HSA_STATUS_SUCCESS) + { + if(ritr->node_id == node_id) + { + agent_map.emplace(ritr->node_id, std::make_tuple(ritr, hitr)); + break; + } + } + } + } + + LOG_IF(ERROR, agent_map.size() != hsa_agents.size()) + << "rocprofiler was only able to map " << agent_map.size() + << " rocprofiler agents to HSA agents, expected " << hsa_agents.size(); + +// For Pre-ROCm 6.0 releases +#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900 +# define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113 +#endif + + auto find_nearest_hsa_cpu_agent = [&table, &agent_map](uint32_t node_id) { + auto _nearest_cpu = hsa_agent_t{.handle = 0}; + auto _hsa_agent = std::get<1>(agent_map.at(node_id)); + if(table->core_->hsa_agent_get_info_fn( + _hsa_agent, + static_cast(HSA_AMD_AGENT_INFO_NEAREST_CPU), + &_nearest_cpu) != HSA_STATUS_SUCCESS) + { + const auto* _rocp_agent = std::get<0>(agent_map.at(node_id)); + auto distance_min = std::numeric_limits::max(); + for(uint32_t i = 0; i < _rocp_agent->io_links_count; ++i) + { + const auto& io_link = _rocp_agent->io_links[i]; + auto _from = io_link.node_from; + auto _to = io_link.node_to; + + LOG_IF(FATAL, _from != node_id) + << "unexpected condition for node_id=" << node_id << ". io_link[" << i + << "].node_from=" << _from + << ". Expected this to match the node_id (node_to=" << _to << ")"; + + if(agent_map.find(_to) == agent_map.end()) + { + LOG(WARNING) << "no agent mapping for io_link[" << i << "].node_to=" << _to + << " in rocprofiler agent " << node_id; + continue; + } + + auto [_to_rocp_agent, _to_hsa_agent] = agent_map.at(_to); + auto _distance = std::abs(static_cast(_from - _to)); + if(_distance > 0 && _distance < distance_min && + _to_rocp_agent->type == ROCPROFILER_AGENT_TYPE_CPU) + { + distance_min = _distance; + _nearest_cpu = _to_hsa_agent; + } + } + } + return _nearest_cpu; + }; + + auto is_duplicate = [](const auto* agent_v) { + for(const auto& itr : get_agent_caches()) + { + if(itr == agent_v) return true; + } + return false; + }; + + // Generate supported agents + for(const auto& itr : agent_map) + { + const auto* rocp_agent = std::get<0>(itr.second); + auto hsa_agent = std::get<1>(itr.second); + if(is_duplicate(rocp_agent)) continue; + + // AgentCache is only for GPU agents + if(rocp_agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue; + + auto _nearest_cpu = find_nearest_hsa_cpu_agent(itr.first); + try + { + get_agent_caches().emplace_back( + rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_); + } catch(std::runtime_error& err) + { + if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU) + { + LOG(ERROR) << fmt::format("rocprofiler agent <-> HSA agent mapping failed: {} ({})", + rocp_agent->node_id, + err.what()); + } + } + } +} + +std::optional +get_hsa_agent(const rocprofiler_agent_t* agent) +{ + for(const auto& itr : get_agent_caches()) + { + if(itr == agent) return itr.get_hsa_agent(); + } + + return std::nullopt; +} + +const rocprofiler_agent_t* +get_rocprofiler_agent(hsa_agent_t agent) +{ + for(const auto& itr : get_agent_caches()) + { + if(itr == agent) return &itr.get_rocp_agent(); + } + + return nullptr; +} + +std::optional +get_agent_cache(const rocprofiler_agent_t* agent) +{ + for(const auto& itr : get_agent_caches()) + { + if(itr == agent) return itr; + } + + return std::nullopt; +} + +std::optional +get_agent_cache(hsa_agent_t agent) +{ + for(const auto& itr : get_agent_caches()) + { + if(itr == agent) return itr; + } + + return std::nullopt; +} } // namespace agent } // namespace rocprofiler @@ -585,15 +829,7 @@ rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback, return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI; } - // auto agents = get_agents(); - auto& agents = rocprofiler::agent::get_agent_topology(); - auto pointers = std::vector{}; - pointers.reserve(agents.size()); - for(auto& agent : agents) - { - pointers.emplace_back(agent.get()); - } - + auto&& pointers = rocprofiler::agent::get_agents(); return callback(pointers.data(), pointers.size(), user_data); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.hpp new file mode 100644 index 0000000000..9d271a3a51 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.hpp @@ -0,0 +1,54 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include "lib/rocprofiler/hsa/agent_cache.hpp" + +#include + +#include +#include + +namespace rocprofiler +{ +namespace agent +{ +std::vector +get_agents(); + +void +construct_agent_cache(::HsaApiTable* table); + +std::optional +get_hsa_agent(const rocprofiler_agent_t* agent); + +const rocprofiler_agent_t* +get_rocprofiler_agent(hsa_agent_t agent); + +std::optional +get_agent_cache(const rocprofiler_agent_t* agent); + +std::optional +get_agent_cache(hsa_agent_t agent); +} // namespace agent +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/packet_construct.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/packet_construct.cpp index a640ec2b37..502eaae37c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/packet_construct.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/packet_construct.cpp @@ -21,7 +21,7 @@ AQLPacketConstruct::AQLPacketConstruct(const hsa::AgentCache& agen // for the counter. for(const auto& x : metrics) { - auto query_info = get_query_info(_agent.get_agent(), x); + auto query_info = get_query_info(_agent.get_hsa_agent(), x); _metrics.emplace_back().metric = x; uint32_t event_id = std::atoi(x.event().c_str()); for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index) @@ -32,7 +32,7 @@ AQLPacketConstruct::AQLPacketConstruct(const hsa::AgentCache& agen event_id}); bool validate_event_result; LOG_IF(FATAL, - hsa_ven_amd_aqlprofile_validate_event(_agent.get_agent(), + hsa_ven_amd_aqlprofile_validate_event(_agent.get_hsa_agent(), &_metrics.back().instances.back(), &validate_event_result) != HSA_STATUS_SUCCESS); @@ -58,7 +58,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const } pkt.profile = hsa_ven_amd_aqlprofile_profile_t{ - _agent.get_agent(), + _agent.get_hsa_agent(), HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM? _events.data(), static_cast(_events.size()), @@ -69,7 +69,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const auto& profile = pkt.profile; hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; - ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_agent(), + ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_hsa_agent(), _agent.kernarg_pool(), HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, static_cast(&_access)); @@ -79,7 +79,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const { throw std::runtime_error( fmt::format("Agent {} does not allow memory pool access for counter collection", - _agent.get_agent().handle)); + _agent.get_hsa_agent().handle)); } auto throw_if_failed = [](auto status, auto& message) { @@ -113,7 +113,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const else { CHECK(*mem_loc); - hsa_agent_t agent = _agent.get_agent(); + hsa_agent_t agent = _agent.get_hsa_agent(); // Memory is accessable by both the GPU and CPU, unlock the command buffer for // sharing. LOG_IF(FATAL, @@ -167,7 +167,8 @@ AQLPacketConstruct::can_collect() iter->second++; if(inserted) { - max_allowed.emplace(block_pair, get_block_counters(_agent.get_agent(), instance)); + max_allowed.emplace(block_pair, + get_block_counters(_agent.get_hsa_agent(), instance)); } } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/tests/aql_test.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/tests/aql_test.cpp index eccd2d0a5a..22e44a01bd 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/tests/aql_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/aql/tests/aql_test.cpp @@ -60,7 +60,7 @@ TEST(aql_profile, construct_packets) auto agents = rocprofiler::hsa::get_queue_controller().get_supported_agents(); for(const auto& [_, agent] : agents) { - LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_agent().handle); + LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle); auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"}); ASSERT_EQ(metrics.size(), 1); AQLPacketConstruct(agent, metrics); @@ -82,7 +82,7 @@ TEST(aql_profile, too_many_counters) for(const auto& [_, agent] : agents) { - LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_agent().handle); + LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle); auto metrics = rocprofiler::findDeviceMetrics(agent, {}); EXPECT_THROW( diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/buffer.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/buffer.cpp index f723e107c4..9261c70d89 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/buffer.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/buffer.cpp @@ -99,8 +99,6 @@ allocate_buffer() rocprofiler_status_t flush(rocprofiler_buffer_id_t buffer_id, bool wait) { - LOG(ERROR) << "flushing..."; - if(buffer_id.handle >= get_buffers().size()) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND; auto& buff = get_buffers().at(buffer_id.handle); @@ -116,7 +114,6 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait) auto idx = buff->buffer_idx++; auto _task = [buffer_id, idx]() { - LOG(ERROR) << "executing task..."; auto& buff_v = get_buffers().at(buffer_id.handle); auto& buff_internal_v = buff_v->get_internal_buffer(idx); @@ -154,7 +151,6 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait) if(task_group) { - LOG(ERROR) << "executing task..."; task_group->exec(_task); if(wait) task_group->wait(); } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters.cpp index d62b0e9797..12b8a1c00d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters.cpp @@ -81,7 +81,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, *instance_count = std::max(size_t(1), *instance_count); continue; } - auto query_info = rocprofiler::aql::get_query_info(maybe_agent->get_agent(), counter); + auto query_info = rocprofiler::aql::get_query_info(maybe_agent->get_hsa_agent(), counter); *instance_count = std::max(static_cast(query_info.instance_count), *instance_count); } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.cpp index 721a77e899..7b4cb6b9cd 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.cpp @@ -23,16 +23,13 @@ #include #include #include +#include #include +#include #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" -// For Pre-ROCm 6.0 releases -#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900 -# define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113 -#endif - namespace { // This function checks to see if the provided @@ -118,7 +115,7 @@ init_gpu_pool(const AmdExtTable& api, rocprofiler::hsa::AgentCache& agent) std::pair params = std::make_pair(&api, &agent.gpu_pool()); auto status = - api.hsa_amd_agent_iterate_memory_pools_fn(agent.get_agent(), FindStandardPool, ¶ms); + api.hsa_amd_agent_iterate_memory_pools_fn(agent.get_hsa_agent(), FindStandardPool, ¶ms); if(status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { @@ -132,82 +129,27 @@ namespace rocprofiler { namespace hsa { -AgentCache::AgentCache(rocprofiler_agent_t agent_t, - size_t index, - const ::CoreApiTable& table, - const AmdExtTable& ext) -: _agent_t(agent_t) -, _index(index) -, _name(agent_t.name) +AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent, + hsa_agent_t hsa_agent, + size_t index, + hsa_agent_t nearest_cpu, + const AmdExtTable& ext_table) +: m_rocp_agent{rocp_agent} +, m_index{index} +, m_hsa_agent{hsa_agent} +, m_nearest_cpu{nearest_cpu} +, m_name{rocp_agent->name} { - // Get HSA Agents - std::vector agents; - table.hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - CHECK_NOTNULL(static_cast*>(data))->emplace_back(agent); - return HSA_STATUS_SUCCESS; - }, - &agents); - - // In case HSA_AMD_AGENT_INFO_NEAREST_CPU is non-functional, default to original v1 behavior - // of last CPU agent being nearest. - std::optional last_cpu; - - bool found = false; - // Find the HSA agent that is represented by rocprofiler_agent_t - for(const auto& agent : agents) - { - hsa_device_type_t type = HSA_DEVICE_TYPE_CPU; - if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) - { - throw std::runtime_error("hsa_agent_get_info failed to find device"); - } - - if(type != HSA_DEVICE_TYPE_GPU) - { - if(type == HSA_DEVICE_TYPE_CPU && !last_cpu) last_cpu = agent; - continue; - } - - uint32_t node_id = 0; - if(table.hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id) != - HSA_STATUS_SUCCESS) - { - throw std::runtime_error("hsa_agent_get_info failed to find driver id"); - } - - // Match rocprofiler_agent_t to hsa_agent for GPU agents - if(_index != node_id) continue; - - if(table.hsa_agent_get_info_fn( - agent, - static_cast(HSA_AMD_AGENT_INFO_NEAREST_CPU), - &_nearest_cpu) != HSA_STATUS_SUCCESS) - { - _nearest_cpu = last_cpu ? *last_cpu : hsa_agent_t{.handle = 0}; - } - - found = true; - _agent = agent; - } - - if(!found) - { - throw std::runtime_error(fmt::format("Could not find GPU id = {}", agent_t.id.handle)); - } - // Construct CPU/GPU pools - try { - init_cpu_pool(ext, *this); - init_gpu_pool(ext, *this); + init_cpu_pool(ext_table, *this); + init_gpu_pool(ext_table, *this); } catch(std::runtime_error& e) { LOG(WARNING) << fmt::format( - "Buffer creation for Agent {} failed ({}), Some profiling options will be unavialable.", - agent_t.id.handle, + "Buffer creation for Agent {} failed ({}), Some profiling options will be unavailable.", + rocp_agent->node_id, e.what()); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.hpp index 4184081592..7ea941384f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent_cache.hpp @@ -53,34 +53,59 @@ static const uint32_t LDS_BLOCK_SIZE = 128 * 4; class AgentCache { public: - AgentCache(rocprofiler_agent_t, size_t index, const ::CoreApiTable&, const AmdExtTable&); + AgentCache(const rocprofiler_agent_t* rocp_agent, + hsa_agent_t hsa_agent, + size_t index, + hsa_agent_t nearest_cpu, + const AmdExtTable& ext_table); + ~AgentCache() = default; + AgentCache(const AgentCache&) = default; + AgentCache(AgentCache&&) noexcept = default; + + AgentCache& operator=(const AgentCache&) = default; + AgentCache& operator=(AgentCache&&) noexcept = default; // Provides const and a non-const accessor functions. - CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, cpu_pool, _cpu_pool); - CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, kernarg_pool, _kernarg_pool); - CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, gpu_pool, _gpu_pool); - CONST_NONCONST_ACCESSOR(rocprofiler_agent_t, agent_t, _agent_t); - CONST_NONCONST_ACCESSOR(hsa_agent_t, get_agent, _agent); - CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, _nearest_cpu); + CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, cpu_pool, m_cpu_pool); + CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, kernarg_pool, m_kernarg_pool); + CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, gpu_pool, m_gpu_pool); + CONST_NONCONST_ACCESSOR(hsa_agent_t, get_hsa_agent, m_hsa_agent); + CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, m_nearest_cpu); - const std::string& name() const { return _name; } + const rocprofiler_agent_t& get_rocp_agent() const { return *m_rocp_agent; } + std::string_view name() const { return m_name; } + size_t index() const { return m_index; } + + bool operator==(const rocprofiler_agent_t*) const; + bool operator==(hsa_agent_t) const; private: // Agent info - rocprofiler_agent_t _agent_t; - size_t _index{0}; // rocprofiler_agent index + const rocprofiler_agent_t* m_rocp_agent = nullptr; + size_t m_index{0}; // rocprofiler_agent index // GPU Agent - hsa_agent_t _agent{.handle = 0}; - hsa_agent_t _nearest_cpu{.handle = 0}; + hsa_agent_t m_hsa_agent{.handle = 0}; + hsa_agent_t m_nearest_cpu{.handle = 0}; // memory pools - hsa_amd_memory_pool_t _cpu_pool{.handle = 0}; - hsa_amd_memory_pool_t _kernarg_pool{.handle = 0}; - hsa_amd_memory_pool_t _gpu_pool{.handle = 0}; + hsa_amd_memory_pool_t m_cpu_pool{.handle = 0}; + hsa_amd_memory_pool_t m_kernarg_pool{.handle = 0}; + hsa_amd_memory_pool_t m_gpu_pool{.handle = 0}; - std::string _name; + std::string_view m_name = {}; }; +inline bool +AgentCache::operator==(const rocprofiler_agent_t* agent) const +{ + return (agent == m_rocp_agent); +} + +inline bool +AgentCache::operator==(hsa_agent_t agent) const +{ + return (agent.handle == m_hsa_agent.handle); +} } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp index d2534353f3..94a2bd775a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue.cpp @@ -307,7 +307,7 @@ Queue::Queue(const AgentCache& agent, { LOG_IF(FATAL, - _ext_api.hsa_amd_queue_intercept_create_fn(_agent.get_agent(), + _ext_api.hsa_amd_queue_intercept_create_fn(_agent.get_hsa_agent(), size, type, callback, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue_controller.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue_controller.cpp index ee82557d0c..eba0fa9be5 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue_controller.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/queue_controller.cpp @@ -19,7 +19,11 @@ // THE SOFTWARE. #include "lib/rocprofiler/hsa/queue_controller.hpp" +#include "lib/rocprofiler/agent.hpp" #include "lib/rocprofiler/context/context.hpp" +#include "lib/rocprofiler/hsa/agent_cache.hpp" + +#include #include @@ -42,7 +46,7 @@ create_queue(hsa_agent_t agent, { for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents()) { - if(agent_info.get_agent().handle == agent.handle) + if(agent_info.get_hsa_agent().handle == agent.handle) { auto new_queue = std::make_unique(agent_info, size, @@ -76,7 +80,7 @@ QueueController::add_queue(hsa_queue_t* id, std::unique_ptr queue) CHECK(queue); _callback_cache.wlock([&](auto& callbacks) { _queues.wlock([&](auto& map) { - const auto agent_id = queue->get_agent().agent_t().id.handle; + const auto agent_id = queue->get_agent().get_rocp_agent().id.handle; map[id] = std::move(queue); for(const auto& [cbid, cb_tuple] : callbacks) { @@ -110,7 +114,7 @@ QueueController::add_callback(const rocprofiler_agent_t& agent, _queues.wlock([&](auto& map) { for(auto& [_, queue] : map) { - if(queue->get_agent().agent_t().id.handle == agent.id.handle) + if(queue->get_agent().get_rocp_agent().id.handle == agent.id.handle) { queue->register_callback(return_id, qcb, ccb); } @@ -140,31 +144,17 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) _core_table = core_table; _ext_table = ext_table; + auto agents = agent::get_agents(); + // Generate supported agents - rocprofiler_query_available_agents( - [](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) { - CHECK(user_data); - QueueController& queue = *reinterpret_cast(user_data); - for(size_t i = 0; i < num_agents; i++) - { - const auto& agent = *agents[i]; - if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue; - try - { - queue.get_supported_agents().emplace( - i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()}); - } catch(std::runtime_error& error) - { - LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not " - "be intercepted): {} ({})", - agent.id.handle, - error.what()); - } - } - return ROCPROFILER_STATUS_SUCCESS; - }, - sizeof(rocprofiler_agent_t), - this); + for(const auto* itr : agents) + { + auto cached_agent = agent::get_agent_cache(itr); + if(cached_agent && cached_agent->get_rocp_agent().type == ROCPROFILER_AGENT_TYPE_GPU) + { + get_supported_agents().emplace(cached_agent->index(), *cached_agent); + } + } auto enable_intercepter = false; for(const auto& itr : context::get_registered_contexts()) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp index 800270b5b0..76b79d4981 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "lib/rocprofiler/registration.hpp" +#include "lib/rocprofiler/agent.hpp" #include "lib/rocprofiler/context/context.hpp" #include "lib/rocprofiler/hsa/hsa.hpp" #include "lib/rocprofiler/hsa/queue.hpp" @@ -541,6 +542,9 @@ rocprofiler_set_api_table(const char* name, << " rocprofiler expected HSA library to pass 1 API table, not " << num_tables; auto* hsa_api_table = static_cast(*tables); + + // need to construct agent mappings before initializing the queue controller + rocprofiler::agent::construct_agent_cache(hsa_api_table); rocprofiler::hsa::queue_controller_init(hsa_api_table); // any internal modifications to the HsaApiTable need to be done before we make the diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp index e75647efb5..13bce3db26 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp @@ -99,13 +99,17 @@ TEST(rocprofiler_lib, agent_abi) EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, num_pc_sampling_configs), 280) << msg; EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 288) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, node_id), 296) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, reserved0), 300) << msg; // Add test for offset of new field above this. Do NOT change any existing values! + constexpr auto expected_rocp_agent_size = 304; // If a new field is added, increase this value by the size of the new field(s) - EXPECT_EQ(sizeof(rocprofiler_agent_t), 296) + EXPECT_EQ(sizeof(rocprofiler_agent_t), expected_rocp_agent_size) << "ABI break. If you added a new field, make sure that this is the only new check that " "failed. Please add a check for the new field at the offset and update this test to the " "new size"; + static_assert(sizeof(rocprofiler_agent_t) == expected_rocp_agent_size, "Update agent size!"); } TEST(rocprofiler_lib, agent) @@ -115,10 +119,18 @@ TEST(rocprofiler_lib, agent) auto info_ret = std::system("/usr/bin/rocminfo"); EXPECT_EQ(info_ret, 0); - auto sys_ret = std::system( + std::cout << "# Data from '/sys/class/kfd/kfd/topology/nodes': \n" << std::flush; + auto sys_ret_kfd = std::system( "/bin/bash -c 'for i in $(find /sys/class/kfd/kfd/topology/nodes -maxdepth 2 -type f | " "grep properties | sort); do echo -e \"\n##### ${i} #####\n\"; cat ${i}; echo \"\"; done'"); - EXPECT_EQ(sys_ret, 0); + EXPECT_EQ(sys_ret_kfd, 0); + + std::cout << "# Data from '/sys/devices/virtual/kfd/kfd/topology/nodes': \n" << std::flush; + auto sys_ret_virt = + std::system("/bin/bash -c 'for i in $(find /sys/devices/virtual/kfd/kfd/topology/nodes " + "-maxdepth 2 -type f | grep properties | sort); do echo -e \"\n##### ${i} " + "#####\n\"; cat ${i}; echo \"\"; done'"); + EXPECT_EQ(sys_ret_virt, 0); auto agents = std::vector{}; rocprofiler_available_agents_cb_t iterate_cb = @@ -133,6 +145,7 @@ TEST(rocprofiler_lib, agent) return ROCPROFILER_STATUS_SUCCESS; }; + std::cout << "# querying available agents...\n" << std::flush; auto status = rocprofiler_query_available_agents(iterate_cb, sizeof(rocprofiler_agent_t), @@ -154,7 +167,7 @@ TEST(rocprofiler_lib, agent) agent->name, agent->model_name, agent->gfx_target_version, - agent->id.handle, + agent->node_id, agent->type == ROCPROFILER_AGENT_TYPE_CPU ? "CPU" : "GPU"); // std::cout << msg << std::endl; @@ -182,6 +195,9 @@ TEST(rocprofiler_lib, agent) EXPECT_EQ(std::string_view{agent->product_name}, std::string_view{hsa_agent->device_mkt_name}) << msg; + EXPECT_EQ(agent->node_id, hsa_agent->internal_node_id) << msg; + EXPECT_EQ(agent->location_id, hsa_agent->bdf_id) << msg; + EXPECT_EQ(agent->device_id, hsa_agent->chip_id) << msg; EXPECT_EQ(agent->simd_count, hsa_agent->compute_unit * hsa_agent->simds_per_cu) << msg; EXPECT_EQ(agent->cu_count, hsa_agent->compute_unit) << msg; EXPECT_EQ(agent->simd_per_cu, hsa_agent->simds_per_cu) << msg;