Separate agent cache from queue controller (#145)
* Update lib/rocprofiler/agent.{hpp,cpp}
- get_agents() function for internal access to agent pointers
* Update AgentCache
- make member variables and member functions distinguish b/t hsa agent and rocprofiler agent clear
* Change ctor of AgentCache
* Update lib/rocprofiler/hsa/queue_controller.cpp
- QueueController::init uses agent::get_agent_cache
* Update lib/rocprofiler/hsa/agent_cache.*
- member function to get index
- operator== for rocprofiler_agent_t and hsa_agent_t
- removed hsa_iterate_agents from ctor (now in agent.cpp)
* Update lib/rocprofiler/agent.*
- construct_agent_cache function
- functions for rocprofiler agent <-> HSA agent
- functions for getting agent cache
* Update lib/rocprofiler/registration.cpp
- invoke construct_agent_cache when HSA table is receieved
* Update lib/rocprofiler/agent.cpp
- loosen failure conditions
- handle spurious duplicate entry warning
* Update lib/rocprofiler/agent.cpp
- improve read_map diagnostics
* Update lib/rocprofiler/agent.cpp
- avoid infinite loop in read_map
* Update lib/rocprofiler/agent.cpp
- handle empty kfd node properties file
* Update lib/rocprofiler/agent.cpp
- check for permissions to read a node properties file
* Update lib/rocprofiler/agent.cpp
- more checks on file readability
* Update lib/rocprofiler/tests/agent.cpp
- print virtual kfd topology
* Update lib/rocprofiler/tests/agent.cpp
- verify id.handle == hsa_agent internal node id
* Update lib/rocprofiler/tests/agent.cpp
- check node_id
- check location id
- check device id
- update abi test
* Update include/rocprofiler/agent.h
- add node_id field
- add reserved0 field to ensure new field increases struct size
* Update lib/rocprofiler/agent.cpp
- node_id instead of id.handle
* Update lib/rocprofiler/agent_cache.cpp
- node_id instead of id.handle
* Update samples/pc_sampling
- node_id for agent instead of id.handle
* Update lib/rocprofiler/buffer.cpp
- remove debug prints
[ROCm/rocprofiler-sdk commit: 7f631de401]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
6c26870c8c
Коммит
178bb0c300
@@ -49,20 +49,20 @@ find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents,
|
||||
if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
*_out_agent = *agents[i];
|
||||
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
||||
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
||||
__FUNCTION__,
|
||||
_out_agent->name,
|
||||
_out_agent->id.handle,
|
||||
_out_agent->node_id,
|
||||
_out_agent->type,
|
||||
_out_agent->num_pc_sampling_configs);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
||||
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
||||
__FUNCTION__,
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->node_id,
|
||||
agents[i]->type,
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
}
|
||||
|
||||
@@ -38,20 +38,20 @@ find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agen
|
||||
|
||||
_out_agents->push_back(*agents[i]);
|
||||
|
||||
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
||||
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
||||
__FUNCTION__,
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->node_id,
|
||||
agents[i]->type,
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
||||
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
||||
__FUNCTION__,
|
||||
agents[i]->name,
|
||||
agents[i]->id.handle,
|
||||
agents[i]->node_id,
|
||||
agents[i]->type,
|
||||
agents[i]->num_pc_sampling_configs);
|
||||
}
|
||||
|
||||
@@ -185,7 +185,10 @@ typedef struct rocprofiler_agent_t
|
||||
///< do not assume the number of PC sampling configurations
|
||||
///< based on the device type.
|
||||
const rocprofiler_pc_sampling_configuration_t*
|
||||
pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types.
|
||||
pc_sampling_configs; ///< GPU only. Array of PC sampling configuration types.
|
||||
uint32_t node_id; ///< Node sequence number. This will be equivalent to the HSA-runtime
|
||||
///< HSA_AMD_AGENT_INFO_DRIVER_NODE_ID property
|
||||
uint32_t reserved0; ///< reserved padding
|
||||
} rocprofiler_agent_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
#
|
||||
rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(ROCPROFILER_LIB_HEADERS buffer.hpp external_correlation.hpp internal_threading.hpp
|
||||
registration.hpp)
|
||||
set(ROCPROFILER_LIB_HEADERS agent.hpp buffer.hpp external_correlation.hpp
|
||||
internal_threading.hpp registration.hpp)
|
||||
set(ROCPROFILER_LIB_SOURCES
|
||||
agent.cpp
|
||||
buffer.cpp
|
||||
|
||||
@@ -24,8 +24,12 @@
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler/agent.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <glog/logging.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <libdrm/amdgpu.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
@@ -161,20 +165,36 @@ get_cpu_info()
|
||||
return _v;
|
||||
}
|
||||
|
||||
// check to see if the file is readable
|
||||
bool
|
||||
is_readable(const fs::path& fpath)
|
||||
{
|
||||
auto ec = std::error_code{};
|
||||
auto perms = fs::status(fpath, ec).permissions();
|
||||
LOG_IF(ERROR, ec) << fmt::format(
|
||||
"Error getting status for file '{}': {}", fpath.string(), ec.message());
|
||||
return (!ec && (perms & fs::perms::owner_read) != fs::perms::none);
|
||||
}
|
||||
|
||||
auto
|
||||
read_file(const std::string& fname)
|
||||
{
|
||||
auto data = std::vector<std::string>{};
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
if(!is_readable(fs::path{fname}))
|
||||
throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs || !ifs.good())
|
||||
throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
while(true)
|
||||
{
|
||||
auto value = std::string{};
|
||||
ifs >> value;
|
||||
if(ifs.eof()) break;
|
||||
if(ifs.eof() || value.empty()) break;
|
||||
|
||||
if(!value.empty()) data.emplace_back(value);
|
||||
data.emplace_back(value);
|
||||
}
|
||||
|
||||
return data;
|
||||
@@ -184,14 +204,20 @@ auto
|
||||
read_map(const std::string& fname)
|
||||
{
|
||||
auto data = std::unordered_map<std::string, std::string>{};
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
if(!is_readable(fs::path{fname}))
|
||||
throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs || !ifs.good())
|
||||
throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
auto last_label = std::string{};
|
||||
while(true)
|
||||
{
|
||||
auto label = std::string{};
|
||||
ifs >> label;
|
||||
if(ifs.eof()) break;
|
||||
if(ifs.eof() || label.empty()) break;
|
||||
|
||||
auto entry = std::string{};
|
||||
ifs >> entry;
|
||||
@@ -201,7 +227,14 @@ read_map(const std::string& fname)
|
||||
|
||||
auto ret = data.emplace(label, entry);
|
||||
if(!ret.second)
|
||||
throw std::runtime_error{fmt::format("duplicate entry in '{}': {}", fname, label)};
|
||||
throw std::runtime_error{
|
||||
fmt::format("duplicate entry in '{}': '{}' (='{}'). last label was '{}'",
|
||||
fname,
|
||||
label,
|
||||
entry,
|
||||
last_label)};
|
||||
|
||||
if(!label.empty()) last_label = std::move(label);
|
||||
}
|
||||
|
||||
return data;
|
||||
@@ -297,13 +330,18 @@ read_topology()
|
||||
|
||||
const auto& cpu_info_v = get_cpu_info();
|
||||
auto data = std::vector<unique_agent_t>{};
|
||||
uint64_t n = 0;
|
||||
uint64_t idcount = 0;
|
||||
uint64_t nodecount = 0;
|
||||
|
||||
while(true)
|
||||
{
|
||||
auto idx = n++;
|
||||
auto idx = idcount++;
|
||||
auto node_path = sysfs_nodes_path / std::to_string(idx);
|
||||
// assumes that nodes are monotonically increasing and thus once we are missing a node
|
||||
// folder for a number, there are no more nodes
|
||||
if(!fs::exists(node_path)) break;
|
||||
// skip if we don't have permission to read the file
|
||||
if(!is_readable(node_path)) continue;
|
||||
|
||||
auto properties = std::unordered_map<std::string, std::string>{};
|
||||
auto name_prop = std::vector<std::string>{};
|
||||
@@ -320,12 +358,16 @@ read_topology()
|
||||
continue;
|
||||
}
|
||||
|
||||
// we may have been able to open the properties file but if it was empty, we ignore it
|
||||
if(properties.empty()) continue;
|
||||
|
||||
auto agent_info = rocprofiler_agent_t{};
|
||||
memset(&agent_info, 0, sizeof(agent_info));
|
||||
|
||||
agent_info.size = sizeof(rocprofiler_agent_t);
|
||||
agent_info.id.handle = idx;
|
||||
agent_info.type = ROCPROFILER_AGENT_TYPE_NONE;
|
||||
agent_info.node_id = nodecount++;
|
||||
|
||||
if(!name_prop.empty())
|
||||
agent_info.model_name = strdup(name_prop.front().c_str());
|
||||
@@ -568,7 +610,209 @@ get_agent_topology()
|
||||
static auto _v = read_topology();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_agent_caches()
|
||||
{
|
||||
static auto _v = std::vector<hsa::AgentCache>{};
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<const rocprofiler_agent_t*>
|
||||
get_agents()
|
||||
{
|
||||
auto& agents = rocprofiler::agent::get_agent_topology();
|
||||
auto pointers = std::vector<const rocprofiler_agent_t*>{};
|
||||
pointers.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
pointers.emplace_back(agent.get());
|
||||
}
|
||||
return pointers;
|
||||
}
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table)
|
||||
{
|
||||
if(!table) return;
|
||||
|
||||
auto rocp_agents = agent::get_agents();
|
||||
auto hsa_agents = std::vector<hsa_agent_t>{};
|
||||
|
||||
// Get HSA Agents
|
||||
table->core_->hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_agents);
|
||||
|
||||
LOG_IF(FATAL, rocp_agents.size() != hsa_agents.size())
|
||||
<< "Found " << rocp_agents.size() << " rocprofiler agents and " << hsa_agents.size()
|
||||
<< " HSA agents";
|
||||
|
||||
auto hsa_agent_node_map = std::unordered_map<uint32_t, hsa_agent_t>{};
|
||||
for(const auto& itr : hsa_agents)
|
||||
{
|
||||
if(uint32_t node_id = 0;
|
||||
table->core_->hsa_agent_get_info_fn(
|
||||
itr, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id) ==
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
hsa_agent_node_map[node_id] = itr;
|
||||
}
|
||||
}
|
||||
|
||||
auto agent_map =
|
||||
std::unordered_map<uint32_t, std::tuple<const rocprofiler_agent_t*, hsa_agent_t>>{};
|
||||
for(const auto* ritr : rocp_agents)
|
||||
{
|
||||
for(auto hitr : hsa_agents)
|
||||
{
|
||||
if(uint32_t node_id = 0;
|
||||
table->core_->hsa_agent_get_info_fn(
|
||||
hitr,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&node_id) == HSA_STATUS_SUCCESS)
|
||||
{
|
||||
if(ritr->node_id == node_id)
|
||||
{
|
||||
agent_map.emplace(ritr->node_id, std::make_tuple(ritr, hitr));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_IF(ERROR, agent_map.size() != hsa_agents.size())
|
||||
<< "rocprofiler was only able to map " << agent_map.size()
|
||||
<< " rocprofiler agents to HSA agents, expected " << hsa_agents.size();
|
||||
|
||||
// For Pre-ROCm 6.0 releases
|
||||
#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900
|
||||
# define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113
|
||||
#endif
|
||||
|
||||
auto find_nearest_hsa_cpu_agent = [&table, &agent_map](uint32_t node_id) {
|
||||
auto _nearest_cpu = hsa_agent_t{.handle = 0};
|
||||
auto _hsa_agent = std::get<1>(agent_map.at(node_id));
|
||||
if(table->core_->hsa_agent_get_info_fn(
|
||||
_hsa_agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NEAREST_CPU),
|
||||
&_nearest_cpu) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
const auto* _rocp_agent = std::get<0>(agent_map.at(node_id));
|
||||
auto distance_min = std::numeric_limits<int32_t>::max();
|
||||
for(uint32_t i = 0; i < _rocp_agent->io_links_count; ++i)
|
||||
{
|
||||
const auto& io_link = _rocp_agent->io_links[i];
|
||||
auto _from = io_link.node_from;
|
||||
auto _to = io_link.node_to;
|
||||
|
||||
LOG_IF(FATAL, _from != node_id)
|
||||
<< "unexpected condition for node_id=" << node_id << ". io_link[" << i
|
||||
<< "].node_from=" << _from
|
||||
<< ". Expected this to match the node_id (node_to=" << _to << ")";
|
||||
|
||||
if(agent_map.find(_to) == agent_map.end())
|
||||
{
|
||||
LOG(WARNING) << "no agent mapping for io_link[" << i << "].node_to=" << _to
|
||||
<< " in rocprofiler agent " << node_id;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto [_to_rocp_agent, _to_hsa_agent] = agent_map.at(_to);
|
||||
auto _distance = std::abs(static_cast<int32_t>(_from - _to));
|
||||
if(_distance > 0 && _distance < distance_min &&
|
||||
_to_rocp_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
{
|
||||
distance_min = _distance;
|
||||
_nearest_cpu = _to_hsa_agent;
|
||||
}
|
||||
}
|
||||
}
|
||||
return _nearest_cpu;
|
||||
};
|
||||
|
||||
auto is_duplicate = [](const auto* agent_v) {
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent_v) return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Generate supported agents
|
||||
for(const auto& itr : agent_map)
|
||||
{
|
||||
const auto* rocp_agent = std::get<0>(itr.second);
|
||||
auto hsa_agent = std::get<1>(itr.second);
|
||||
if(is_duplicate(rocp_agent)) continue;
|
||||
|
||||
// AgentCache is only for GPU agents
|
||||
if(rocp_agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
|
||||
auto _nearest_cpu = find_nearest_hsa_cpu_agent(itr.first);
|
||||
try
|
||||
{
|
||||
get_agent_caches().emplace_back(
|
||||
rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_);
|
||||
} catch(std::runtime_error& err)
|
||||
{
|
||||
if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
LOG(ERROR) << fmt::format("rocprofiler agent <-> HSA agent mapping failed: {} ({})",
|
||||
rocp_agent->node_id,
|
||||
err.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<hsa_agent_t>
|
||||
get_hsa_agent(const rocprofiler_agent_t* agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent) return itr.get_hsa_agent();
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const rocprofiler_agent_t*
|
||||
get_rocprofiler_agent(hsa_agent_t agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent) return &itr.get_rocp_agent();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
get_agent_cache(const rocprofiler_agent_t* agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent) return itr;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
get_agent_cache(hsa_agent_t agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent) return itr;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
} // namespace agent
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -585,15 +829,7 @@ rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI;
|
||||
}
|
||||
|
||||
// auto agents = get_agents();
|
||||
auto& agents = rocprofiler::agent::get_agent_topology();
|
||||
auto pointers = std::vector<const rocprofiler_agent_t*>{};
|
||||
pointers.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
pointers.emplace_back(agent.get());
|
||||
}
|
||||
|
||||
auto&& pointers = rocprofiler::agent::get_agents();
|
||||
return callback(pointers.data(), pointers.size(), user_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler/agent.h>
|
||||
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace agent
|
||||
{
|
||||
std::vector<const rocprofiler_agent_t*>
|
||||
get_agents();
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table);
|
||||
|
||||
std::optional<hsa_agent_t>
|
||||
get_hsa_agent(const rocprofiler_agent_t* agent);
|
||||
|
||||
const rocprofiler_agent_t*
|
||||
get_rocprofiler_agent(hsa_agent_t agent);
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
get_agent_cache(const rocprofiler_agent_t* agent);
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
get_agent_cache(hsa_agent_t agent);
|
||||
} // namespace agent
|
||||
} // namespace rocprofiler
|
||||
@@ -21,7 +21,7 @@ AQLPacketConstruct::AQLPacketConstruct(const hsa::AgentCache& agen
|
||||
// for the counter.
|
||||
for(const auto& x : metrics)
|
||||
{
|
||||
auto query_info = get_query_info(_agent.get_agent(), x);
|
||||
auto query_info = get_query_info(_agent.get_hsa_agent(), x);
|
||||
_metrics.emplace_back().metric = x;
|
||||
uint32_t event_id = std::atoi(x.event().c_str());
|
||||
for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index)
|
||||
@@ -32,7 +32,7 @@ AQLPacketConstruct::AQLPacketConstruct(const hsa::AgentCache& agen
|
||||
event_id});
|
||||
bool validate_event_result;
|
||||
LOG_IF(FATAL,
|
||||
hsa_ven_amd_aqlprofile_validate_event(_agent.get_agent(),
|
||||
hsa_ven_amd_aqlprofile_validate_event(_agent.get_hsa_agent(),
|
||||
&_metrics.back().instances.back(),
|
||||
&validate_event_result) !=
|
||||
HSA_STATUS_SUCCESS);
|
||||
@@ -58,7 +58,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const
|
||||
}
|
||||
|
||||
pkt.profile = hsa_ven_amd_aqlprofile_profile_t{
|
||||
_agent.get_agent(),
|
||||
_agent.get_hsa_agent(),
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM?
|
||||
_events.data(),
|
||||
static_cast<uint32_t>(_events.size()),
|
||||
@@ -69,7 +69,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const
|
||||
auto& profile = pkt.profile;
|
||||
|
||||
hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
||||
ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_agent(),
|
||||
ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_hsa_agent(),
|
||||
_agent.kernarg_pool(),
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
static_cast<void*>(&_access));
|
||||
@@ -79,7 +79,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const
|
||||
{
|
||||
throw std::runtime_error(
|
||||
fmt::format("Agent {} does not allow memory pool access for counter collection",
|
||||
_agent.get_agent().handle));
|
||||
_agent.get_hsa_agent().handle));
|
||||
}
|
||||
|
||||
auto throw_if_failed = [](auto status, auto& message) {
|
||||
@@ -113,7 +113,7 @@ AQLPacketConstruct::construct_packet(const AmdExtTable& ext) const
|
||||
else
|
||||
{
|
||||
CHECK(*mem_loc);
|
||||
hsa_agent_t agent = _agent.get_agent();
|
||||
hsa_agent_t agent = _agent.get_hsa_agent();
|
||||
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
|
||||
// sharing.
|
||||
LOG_IF(FATAL,
|
||||
@@ -167,7 +167,8 @@ AQLPacketConstruct::can_collect()
|
||||
iter->second++;
|
||||
if(inserted)
|
||||
{
|
||||
max_allowed.emplace(block_pair, get_block_counters(_agent.get_agent(), instance));
|
||||
max_allowed.emplace(block_pair,
|
||||
get_block_counters(_agent.get_hsa_agent(), instance));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ TEST(aql_profile, construct_packets)
|
||||
auto agents = rocprofiler::hsa::get_queue_controller().get_supported_agents();
|
||||
for(const auto& [_, agent] : agents)
|
||||
{
|
||||
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_agent().handle);
|
||||
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle);
|
||||
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
|
||||
ASSERT_EQ(metrics.size(), 1);
|
||||
AQLPacketConstruct(agent, metrics);
|
||||
@@ -82,7 +82,7 @@ TEST(aql_profile, too_many_counters)
|
||||
|
||||
for(const auto& [_, agent] : agents)
|
||||
{
|
||||
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_agent().handle);
|
||||
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle);
|
||||
|
||||
auto metrics = rocprofiler::findDeviceMetrics(agent, {});
|
||||
EXPECT_THROW(
|
||||
|
||||
@@ -99,8 +99,6 @@ allocate_buffer()
|
||||
rocprofiler_status_t
|
||||
flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
{
|
||||
LOG(ERROR) << "flushing...";
|
||||
|
||||
if(buffer_id.handle >= get_buffers().size()) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
|
||||
|
||||
auto& buff = get_buffers().at(buffer_id.handle);
|
||||
@@ -116,7 +114,6 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
auto idx = buff->buffer_idx++;
|
||||
|
||||
auto _task = [buffer_id, idx]() {
|
||||
LOG(ERROR) << "executing task...";
|
||||
auto& buff_v = get_buffers().at(buffer_id.handle);
|
||||
auto& buff_internal_v = buff_v->get_internal_buffer(idx);
|
||||
|
||||
@@ -154,7 +151,6 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
|
||||
if(task_group)
|
||||
{
|
||||
LOG(ERROR) << "executing task...";
|
||||
task_group->exec(_task);
|
||||
if(wait) task_group->wait();
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent,
|
||||
*instance_count = std::max(size_t(1), *instance_count);
|
||||
continue;
|
||||
}
|
||||
auto query_info = rocprofiler::aql::get_query_info(maybe_agent->get_agent(), counter);
|
||||
auto query_info = rocprofiler::aql::get_query_info(maybe_agent->get_hsa_agent(), counter);
|
||||
*instance_count = std::max(static_cast<size_t>(query_info.instance_count), *instance_count);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,16 +23,13 @@
|
||||
#include <glog/logging.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
// For Pre-ROCm 6.0 releases
|
||||
#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900
|
||||
# define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
// This function checks to see if the provided
|
||||
@@ -118,7 +115,7 @@ init_gpu_pool(const AmdExtTable& api, rocprofiler::hsa::AgentCache& agent)
|
||||
std::pair<const AmdExtTable*, hsa_amd_memory_pool_t*> params =
|
||||
std::make_pair(&api, &agent.gpu_pool());
|
||||
auto status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(agent.get_agent(), FindStandardPool, ¶ms);
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(agent.get_hsa_agent(), FindStandardPool, ¶ms);
|
||||
|
||||
if(status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
{
|
||||
@@ -132,82 +129,27 @@ namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
AgentCache::AgentCache(rocprofiler_agent_t agent_t,
|
||||
size_t index,
|
||||
const ::CoreApiTable& table,
|
||||
const AmdExtTable& ext)
|
||||
: _agent_t(agent_t)
|
||||
, _index(index)
|
||||
, _name(agent_t.name)
|
||||
AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
|
||||
hsa_agent_t hsa_agent,
|
||||
size_t index,
|
||||
hsa_agent_t nearest_cpu,
|
||||
const AmdExtTable& ext_table)
|
||||
: m_rocp_agent{rocp_agent}
|
||||
, m_index{index}
|
||||
, m_hsa_agent{hsa_agent}
|
||||
, m_nearest_cpu{nearest_cpu}
|
||||
, m_name{rocp_agent->name}
|
||||
{
|
||||
// Get HSA Agents
|
||||
std::vector<hsa_agent_t> agents;
|
||||
table.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
|
||||
// In case HSA_AMD_AGENT_INFO_NEAREST_CPU is non-functional, default to original v1 behavior
|
||||
// of last CPU agent being nearest.
|
||||
std::optional<hsa_agent_t> last_cpu;
|
||||
|
||||
bool found = false;
|
||||
// Find the HSA agent that is represented by rocprofiler_agent_t
|
||||
for(const auto& agent : agents)
|
||||
{
|
||||
hsa_device_type_t type = HSA_DEVICE_TYPE_CPU;
|
||||
if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
throw std::runtime_error("hsa_agent_get_info failed to find device");
|
||||
}
|
||||
|
||||
if(type != HSA_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if(type == HSA_DEVICE_TYPE_CPU && !last_cpu) last_cpu = agent;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t node_id = 0;
|
||||
if(table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
throw std::runtime_error("hsa_agent_get_info failed to find driver id");
|
||||
}
|
||||
|
||||
// Match rocprofiler_agent_t to hsa_agent for GPU agents
|
||||
if(_index != node_id) continue;
|
||||
|
||||
if(table.hsa_agent_get_info_fn(
|
||||
agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NEAREST_CPU),
|
||||
&_nearest_cpu) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
_nearest_cpu = last_cpu ? *last_cpu : hsa_agent_t{.handle = 0};
|
||||
}
|
||||
|
||||
found = true;
|
||||
_agent = agent;
|
||||
}
|
||||
|
||||
if(!found)
|
||||
{
|
||||
throw std::runtime_error(fmt::format("Could not find GPU id = {}", agent_t.id.handle));
|
||||
}
|
||||
|
||||
// Construct CPU/GPU pools
|
||||
|
||||
try
|
||||
{
|
||||
init_cpu_pool(ext, *this);
|
||||
init_gpu_pool(ext, *this);
|
||||
init_cpu_pool(ext_table, *this);
|
||||
init_gpu_pool(ext_table, *this);
|
||||
} catch(std::runtime_error& e)
|
||||
{
|
||||
LOG(WARNING) << fmt::format(
|
||||
"Buffer creation for Agent {} failed ({}), Some profiling options will be unavialable.",
|
||||
agent_t.id.handle,
|
||||
"Buffer creation for Agent {} failed ({}), Some profiling options will be unavailable.",
|
||||
rocp_agent->node_id,
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,34 +53,59 @@ static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
|
||||
class AgentCache
|
||||
{
|
||||
public:
|
||||
AgentCache(rocprofiler_agent_t, size_t index, const ::CoreApiTable&, const AmdExtTable&);
|
||||
AgentCache(const rocprofiler_agent_t* rocp_agent,
|
||||
hsa_agent_t hsa_agent,
|
||||
size_t index,
|
||||
hsa_agent_t nearest_cpu,
|
||||
const AmdExtTable& ext_table);
|
||||
~AgentCache() = default;
|
||||
AgentCache(const AgentCache&) = default;
|
||||
AgentCache(AgentCache&&) noexcept = default;
|
||||
|
||||
AgentCache& operator=(const AgentCache&) = default;
|
||||
AgentCache& operator=(AgentCache&&) noexcept = default;
|
||||
|
||||
// Provides const and a non-const accessor functions.
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, cpu_pool, _cpu_pool);
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, kernarg_pool, _kernarg_pool);
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, gpu_pool, _gpu_pool);
|
||||
CONST_NONCONST_ACCESSOR(rocprofiler_agent_t, agent_t, _agent_t);
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, get_agent, _agent);
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, _nearest_cpu);
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, cpu_pool, m_cpu_pool);
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, kernarg_pool, m_kernarg_pool);
|
||||
CONST_NONCONST_ACCESSOR(hsa_amd_memory_pool_t, gpu_pool, m_gpu_pool);
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, get_hsa_agent, m_hsa_agent);
|
||||
CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, m_nearest_cpu);
|
||||
|
||||
const std::string& name() const { return _name; }
|
||||
const rocprofiler_agent_t& get_rocp_agent() const { return *m_rocp_agent; }
|
||||
std::string_view name() const { return m_name; }
|
||||
size_t index() const { return m_index; }
|
||||
|
||||
bool operator==(const rocprofiler_agent_t*) const;
|
||||
bool operator==(hsa_agent_t) const;
|
||||
|
||||
private:
|
||||
// Agent info
|
||||
rocprofiler_agent_t _agent_t;
|
||||
size_t _index{0}; // rocprofiler_agent index
|
||||
const rocprofiler_agent_t* m_rocp_agent = nullptr;
|
||||
size_t m_index{0}; // rocprofiler_agent index
|
||||
|
||||
// GPU Agent
|
||||
hsa_agent_t _agent{.handle = 0};
|
||||
hsa_agent_t _nearest_cpu{.handle = 0};
|
||||
hsa_agent_t m_hsa_agent{.handle = 0};
|
||||
hsa_agent_t m_nearest_cpu{.handle = 0};
|
||||
|
||||
// memory pools
|
||||
hsa_amd_memory_pool_t _cpu_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t _kernarg_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t _gpu_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t m_cpu_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t m_kernarg_pool{.handle = 0};
|
||||
hsa_amd_memory_pool_t m_gpu_pool{.handle = 0};
|
||||
|
||||
std::string _name;
|
||||
std::string_view m_name = {};
|
||||
};
|
||||
|
||||
inline bool
|
||||
AgentCache::operator==(const rocprofiler_agent_t* agent) const
|
||||
{
|
||||
return (agent == m_rocp_agent);
|
||||
}
|
||||
|
||||
inline bool
|
||||
AgentCache::operator==(hsa_agent_t agent) const
|
||||
{
|
||||
return (agent.handle == m_hsa_agent.handle);
|
||||
}
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -307,7 +307,7 @@ Queue::Queue(const AgentCache& agent,
|
||||
|
||||
{
|
||||
LOG_IF(FATAL,
|
||||
_ext_api.hsa_amd_queue_intercept_create_fn(_agent.get_agent(),
|
||||
_ext_api.hsa_amd_queue_intercept_create_fn(_agent.get_hsa_agent(),
|
||||
size,
|
||||
type,
|
||||
callback,
|
||||
|
||||
@@ -19,7 +19,11 @@
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler/agent.hpp"
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent_cache.hpp"
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
@@ -42,7 +46,7 @@ create_queue(hsa_agent_t agent,
|
||||
{
|
||||
for(const auto& [_, agent_info] : get_queue_controller().get_supported_agents())
|
||||
{
|
||||
if(agent_info.get_agent().handle == agent.handle)
|
||||
if(agent_info.get_hsa_agent().handle == agent.handle)
|
||||
{
|
||||
auto new_queue = std::make_unique<Queue>(agent_info,
|
||||
size,
|
||||
@@ -76,7 +80,7 @@ QueueController::add_queue(hsa_queue_t* id, std::unique_ptr<Queue> queue)
|
||||
CHECK(queue);
|
||||
_callback_cache.wlock([&](auto& callbacks) {
|
||||
_queues.wlock([&](auto& map) {
|
||||
const auto agent_id = queue->get_agent().agent_t().id.handle;
|
||||
const auto agent_id = queue->get_agent().get_rocp_agent().id.handle;
|
||||
map[id] = std::move(queue);
|
||||
for(const auto& [cbid, cb_tuple] : callbacks)
|
||||
{
|
||||
@@ -110,7 +114,7 @@ QueueController::add_callback(const rocprofiler_agent_t& agent,
|
||||
_queues.wlock([&](auto& map) {
|
||||
for(auto& [_, queue] : map)
|
||||
{
|
||||
if(queue->get_agent().agent_t().id.handle == agent.id.handle)
|
||||
if(queue->get_agent().get_rocp_agent().id.handle == agent.id.handle)
|
||||
{
|
||||
queue->register_callback(return_id, qcb, ccb);
|
||||
}
|
||||
@@ -140,31 +144,17 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
_core_table = core_table;
|
||||
_ext_table = ext_table;
|
||||
|
||||
auto agents = agent::get_agents();
|
||||
|
||||
// Generate supported agents
|
||||
rocprofiler_query_available_agents(
|
||||
[](const rocprofiler_agent_t** agents, size_t num_agents, void* user_data) {
|
||||
CHECK(user_data);
|
||||
QueueController& queue = *reinterpret_cast<QueueController*>(user_data);
|
||||
for(size_t i = 0; i < num_agents; i++)
|
||||
{
|
||||
const auto& agent = *agents[i];
|
||||
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
try
|
||||
{
|
||||
queue.get_supported_agents().emplace(
|
||||
i, AgentCache{agent, i, queue.get_core_table(), queue.get_ext_table()});
|
||||
} catch(std::runtime_error& error)
|
||||
{
|
||||
LOG(ERROR) << fmt::format("GPU Agent Construction Failed (HSA queue will not "
|
||||
"be intercepted): {} ({})",
|
||||
agent.id.handle,
|
||||
error.what());
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
sizeof(rocprofiler_agent_t),
|
||||
this);
|
||||
for(const auto* itr : agents)
|
||||
{
|
||||
auto cached_agent = agent::get_agent_cache(itr);
|
||||
if(cached_agent && cached_agent->get_rocp_agent().type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
get_supported_agents().emplace(cached_agent->index(), *cached_agent);
|
||||
}
|
||||
}
|
||||
|
||||
auto enable_intercepter = false;
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
#include "lib/rocprofiler/agent.hpp"
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler/hsa/queue.hpp"
|
||||
@@ -541,6 +542,9 @@ rocprofiler_set_api_table(const char* name,
|
||||
<< " rocprofiler expected HSA library to pass 1 API table, not " << num_tables;
|
||||
|
||||
auto* hsa_api_table = static_cast<HsaApiTable*>(*tables);
|
||||
|
||||
// need to construct agent mappings before initializing the queue controller
|
||||
rocprofiler::agent::construct_agent_cache(hsa_api_table);
|
||||
rocprofiler::hsa::queue_controller_init(hsa_api_table);
|
||||
|
||||
// any internal modifications to the HsaApiTable need to be done before we make the
|
||||
|
||||
@@ -99,13 +99,17 @@ TEST(rocprofiler_lib, agent_abi)
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_pc_sampling_configs), 280) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 288) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, node_id), 296) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, reserved0), 300) << msg;
|
||||
// Add test for offset of new field above this. Do NOT change any existing values!
|
||||
|
||||
constexpr auto expected_rocp_agent_size = 304;
|
||||
// If a new field is added, increase this value by the size of the new field(s)
|
||||
EXPECT_EQ(sizeof(rocprofiler_agent_t), 296)
|
||||
EXPECT_EQ(sizeof(rocprofiler_agent_t), expected_rocp_agent_size)
|
||||
<< "ABI break. If you added a new field, make sure that this is the only new check that "
|
||||
"failed. Please add a check for the new field at the offset and update this test to the "
|
||||
"new size";
|
||||
static_assert(sizeof(rocprofiler_agent_t) == expected_rocp_agent_size, "Update agent size!");
|
||||
}
|
||||
|
||||
TEST(rocprofiler_lib, agent)
|
||||
@@ -115,10 +119,18 @@ TEST(rocprofiler_lib, agent)
|
||||
auto info_ret = std::system("/usr/bin/rocminfo");
|
||||
EXPECT_EQ(info_ret, 0);
|
||||
|
||||
auto sys_ret = std::system(
|
||||
std::cout << "# Data from '/sys/class/kfd/kfd/topology/nodes': \n" << std::flush;
|
||||
auto sys_ret_kfd = std::system(
|
||||
"/bin/bash -c 'for i in $(find /sys/class/kfd/kfd/topology/nodes -maxdepth 2 -type f | "
|
||||
"grep properties | sort); do echo -e \"\n##### ${i} #####\n\"; cat ${i}; echo \"\"; done'");
|
||||
EXPECT_EQ(sys_ret, 0);
|
||||
EXPECT_EQ(sys_ret_kfd, 0);
|
||||
|
||||
std::cout << "# Data from '/sys/devices/virtual/kfd/kfd/topology/nodes': \n" << std::flush;
|
||||
auto sys_ret_virt =
|
||||
std::system("/bin/bash -c 'for i in $(find /sys/devices/virtual/kfd/kfd/topology/nodes "
|
||||
"-maxdepth 2 -type f | grep properties | sort); do echo -e \"\n##### ${i} "
|
||||
"#####\n\"; cat ${i}; echo \"\"; done'");
|
||||
EXPECT_EQ(sys_ret_virt, 0);
|
||||
|
||||
auto agents = std::vector<const rocprofiler_agent_t*>{};
|
||||
rocprofiler_available_agents_cb_t iterate_cb =
|
||||
@@ -133,6 +145,7 @@ TEST(rocprofiler_lib, agent)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
std::cout << "# querying available agents...\n" << std::flush;
|
||||
auto status =
|
||||
rocprofiler_query_available_agents(iterate_cb,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
@@ -154,7 +167,7 @@ TEST(rocprofiler_lib, agent)
|
||||
agent->name,
|
||||
agent->model_name,
|
||||
agent->gfx_target_version,
|
||||
agent->id.handle,
|
||||
agent->node_id,
|
||||
agent->type == ROCPROFILER_AGENT_TYPE_CPU ? "CPU" : "GPU");
|
||||
|
||||
// std::cout << msg << std::endl;
|
||||
@@ -182,6 +195,9 @@ TEST(rocprofiler_lib, agent)
|
||||
EXPECT_EQ(std::string_view{agent->product_name},
|
||||
std::string_view{hsa_agent->device_mkt_name})
|
||||
<< msg;
|
||||
EXPECT_EQ(agent->node_id, hsa_agent->internal_node_id) << msg;
|
||||
EXPECT_EQ(agent->location_id, hsa_agent->bdf_id) << msg;
|
||||
EXPECT_EQ(agent->device_id, hsa_agent->chip_id) << msg;
|
||||
EXPECT_EQ(agent->simd_count, hsa_agent->compute_unit * hsa_agent->simds_per_cu) << msg;
|
||||
EXPECT_EQ(agent->cu_count, hsa_agent->compute_unit) << msg;
|
||||
EXPECT_EQ(agent->simd_per_cu, hsa_agent->simds_per_cu) << msg;
|
||||
|
||||
Ссылка в новой задаче
Block a user