Agent Implementation (#78)

* Agent Implementation

* Remove unused Findrocprofiler

* Update lib/rocprofiler/hsa/agent.{hpp,cpp}

- default AgentInfo ctor
- getNumaNode() const
- noexcept move ctors
- default initializers for member variables
- fixed clang-tidy recommentations
  - preallocate
  - static in anon namespace
- AgentInfo::setName uses strncpy and ensures that it is terminated

* Update lib/rocprofiler/rocprofiler.cpp (agent.cpp and pc_sampling.cpp)

- move public PC sampling function implementations to pc_sampling.cpp
- move public agent function implementation to agent.cpp
This commit is contained in:
Jonathan R. Madsen
2023-09-22 13:51:21 -05:00
zatwierdzone przez GitHub
rodzic 5c07deb159
commit 6fb9000fa1
13 zmienionych plików z 858 dodań i 173 usunięć
-97
Wyświetl plik
@@ -1,97 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
# Copyright.txt or https://cmake.org/licensing for details.
include(FindPackageHandleStandardArgs)
# ----------------------------------------------------------------------------------------#
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(ROCM_PATH "$ENV{ROCM_PATH}")
endif()
foreach(_DIR ${rocm_version_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler)
if(EXISTS ${_DIR})
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR})
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
find_path(
rocprofiler_ROOT_DIR
NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h
HINTS ${_ROCM_ROCPROFILER_PATHS}
PATHS ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES rocprofiler)
mark_as_advanced(rocprofiler_ROOT_DIR)
# ----------------------------------------------------------------------------------------#
find_path(
rocprofiler_INCLUDE_DIR
NAMES rocprofiler.h
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES include include/rocprofiler rocprofiler/include)
mark_as_advanced(rocprofiler_INCLUDE_DIR)
find_path(
rocprofiler_hsa_INCLUDE_DIR
NAMES hsa.h
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES include include/hsa)
mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR)
# ----------------------------------------------------------------------------------------#
find_library(
rocprofiler_LIBRARY
NAMES rocprofiler64 rocprofiler
HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES lib lib64
NO_DEFAULT_PATH)
find_library(
rocprofiler_hsa-runtime_LIBRARY
NAMES hsa-runtime64 hsa-runtime
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
PATH_SUFFIXES lib lib64)
if(rocprofiler_LIBRARY)
get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE)
endif()
mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
unset(_ROCM_ROCPROFILER_PATHS)
# ----------------------------------------------------------------------------------------#
find_package_handle_standard_args(
rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR
rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
# ----------------------------------------------------------------------------------------#
if(rocprofiler_FOUND)
add_library(rocprofiler::rocprofiler INTERFACE IMPORTED)
add_library(rocprofiler::roctx INTERFACE IMPORTED)
set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR}
${rocprofiler_hsa_INCLUDE_DIR})
set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY})
set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR})
target_include_directories(
rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR}
${rocprofiler_hsa_INCLUDE_DIR})
target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES})
endif()
+2 -1
Wyświetl plik
@@ -120,7 +120,8 @@ endif()
rocprofiler_target_compile_options(
rocprofiler-developer-flags
LANGUAGES C CXX
INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra")
INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra"
"-Wno-missing-field-initializers")
if(ROCPROFILER_BUILD_DEVELOPER)
target_link_libraries(rocprofiler-build-flags
@@ -154,3 +154,17 @@ target_link_libraries(rocprofiler-amd-comgr INTERFACE amd_comgr)
# ----------------------------------------------------------------------------------------#
target_link_libraries(rocprofiler-ptl INTERFACE PTL::ptl-static)
# ----------------------------------------------------------------------------------------#
#
# amd aql
#
# ----------------------------------------------------------------------------------------#
find_library(
hsa-amd-aqlprofile64_library
NAMES hsa-amd-aqlprofile64 hsa-amd-aqlprofile
HINTS ${rocm_version_DIR} ${ROCM_PATH}
PATHS ${rocm_version_DIR} ${ROCM_PATH})
target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library})
+1
Wyświetl plik
@@ -50,3 +50,4 @@ rocprofiler_add_interface_library(rocprofiler-glog "Google Log library" INTERNAL
rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL)
+2 -1
Wyświetl plik
@@ -29,6 +29,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hip>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-amd-comgr>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-runtime>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>)
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>)
set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME
rocprofiler-common)
+16
Wyświetl plik
@@ -45,5 +45,21 @@ timestamp_ns()
// TODO(jrmadsen): this should be updated to the HSA method
return std::chrono::steady_clock::now().time_since_epoch().count();
}
template <class Container, typename Key = typename Container::key_type>
const auto*
get_val(const Container& map, const Key& key)
{
auto pos = map.find(key);
return (pos != map.end() ? &pos->second : nullptr);
}
template <class Container, typename Key = typename Container::key_type>
auto*
get_val(Container& map, const Key& key)
{
auto pos = map.find(key);
return (pos != map.end() ? &pos->second : nullptr);
}
} // namespace common
} // namespace rocprofiler
+9 -2
Wyświetl plik
@@ -5,8 +5,15 @@ rocprofiler_activate_clang_tidy()
set(ROCPROFILER_LIB_HEADERS buffer.hpp internal_threading.hpp registration.hpp)
set(ROCPROFILER_LIB_SOURCES
buffer.cpp buffer_tracing.cpp callback_tracing.cpp context.cpp internal_threading.cpp
rocprofiler.cpp registration.cpp)
agent.cpp
buffer.cpp
buffer_tracing.cpp
callback_tracing.cpp
context.cpp
internal_threading.cpp
pc_sampling.cpp
rocprofiler.cpp
registration.cpp)
# ----------------------------------------------------------------------------------------#
#
+78
Wyświetl plik
@@ -0,0 +1,78 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/agent.h>
#include <rocprofiler/fwd.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/rocprofiler/hsa/agent.hpp"
#include <vector>
extern "C" {
rocprofiler_status_t
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
size_t agent_size,
void* user_data)
{
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
auto pc_sampling_configs = std::vector<pc_sampling_config_vec_t>{};
auto get_agents = [&pc_sampling_configs]() {
static const auto _default_pc_config =
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
1UL,
1000000000UL,
0};
auto temporaries_ = std::vector<rocprofiler_agent_t>{};
const auto& agent_info = rocprofiler::hsa::all_agents();
for(const auto& agent : agent_info)
{
auto& _data = pc_sampling_configs.emplace_back();
if(agent.isGpu()) _data = {_default_pc_config};
temporaries_.emplace_back(rocprofiler_agent_t{
.id = rocprofiler_agent_id_t{.handle = temporaries_.size()},
.type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU
: (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU
: ROCPROFILER_AGENT_TYPE_NONE)),
.name = agent.getNameChar(),
.pc_sampling_configs =
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}});
}
return temporaries_;
};
auto agents = get_agents();
auto pointers = std::vector<rocprofiler_agent_t*>{};
pointers.reserve(agents.size());
for(auto& agent : agents)
{
pointers.emplace_back(&agent);
}
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
"rocprofiler");
return callback(pointers.data(), pointers.size(), user_data);
}
}
@@ -1,9 +1,5 @@
#
#
#
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp)
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp)
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp agent.cpp)
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp agent.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
${ROCPROFILER_LIB_HSA_HEADERS})
+499
Wyświetl plik
@@ -0,0 +1,499 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "agent.hpp"
#include <glog/logging.h>
#include <filesystem>
#include <fstream>
#include "lib/common/utility.hpp"
namespace fs = std::filesystem;
namespace rocprofiler
{
namespace hsa
{
namespace
{
std::unordered_map<long long, long long>
get_gpu_nodes_near_cpu()
{
std::unordered_map<long long, long long> gpu_numa_nodes_near_cpu;
long long gpu_numa_nodes_start = 0;
std::string path = "/sys/class/kfd/kfd/topology/nodes";
for(const auto& entry : fs::directory_iterator(path))
{
long long node_id = std::stoll(entry.path().filename().c_str());
std::ifstream gpu_id_file;
std::string gpu_path = entry.path().c_str();
gpu_path += "/gpu_id";
gpu_id_file.open(gpu_path);
std::string gpu_id_str;
if(gpu_id_file.is_open())
{
gpu_id_file >> gpu_id_str;
if(!gpu_id_str.empty())
{
auto gpu_id = std::stoll(gpu_id_str);
if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0))
{
gpu_numa_nodes_start = node_id;
}
}
}
gpu_id_file.close();
}
path = "/sys/class/kfd/kfd/topology/nodes";
for(const auto& entry : fs::directory_iterator(path))
{
long long node_id = std::stoll(entry.path().filename().c_str());
std::string numa_node_path = entry.path().c_str();
long long agent_id = std::stoll(entry.path().filename().c_str());
if(agent_id >= gpu_numa_nodes_start)
{
numa_node_path += "/io_links";
for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path))
{
std::string numa_node_entry_properties_path = numa_node_entry.path().c_str();
numa_node_entry_properties_path += "/properties";
std::ifstream gpu_properties_file;
gpu_properties_file.open(numa_node_entry_properties_path);
std::string gpu_properties_file_line;
if(gpu_properties_file.is_open())
{
while(gpu_properties_file)
{
std::getline(gpu_properties_file, gpu_properties_file_line);
std::string delimiter = " ";
std::stringstream ss(gpu_properties_file_line);
std::string word;
ss >> word;
if(word == "node_to")
{
ss >> word;
long long near_cpu_node_id = std::stoll(word);
if(near_cpu_node_id < gpu_numa_nodes_start)
{
gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id;
}
}
}
}
gpu_properties_file.close();
}
}
}
return gpu_numa_nodes_near_cpu;
}
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg)
{
if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
auto [api_ptr, pool_ptr] =
*static_cast<std::pair<const AmdExtTable*, hsa_amd_memory_pool_t*>*>(data);
hsa_amd_segment_t segment;
LOG_IF(FATAL,
api_ptr->hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR)
<< "Could not get pool segment";
if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
uint32_t flag;
LOG_IF(FATAL,
api_ptr->hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR)
<< "Could not get flag value";
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg))
{
return HSA_STATUS_SUCCESS;
}
*(pool_ptr) = pool;
return HSA_STATUS_INFO_BREAK;
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t
FindStandardPool(hsa_amd_memory_pool_t pool, void* data)
{
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t
FindKernArgPool(hsa_amd_memory_pool_t pool, void* data)
{
return FindGlobalPool(pool, data, true);
}
void
init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent)
{
CHECK(!cpu_agent.isGpu());
auto params = std::make_pair(&api, &cpu_agent.cpu_pool);
auto status =
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, &params);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: Command Buffer Pool is not initialized";
params.second = &cpu_agent.kernarg_pool;
status =
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params));
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: Output Buffer Pool is not initialized";
}
void
init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info)
{
CHECK(agent_info.isGpu());
auto params = std::make_pair(&api, &agent_info.gpu_pool);
auto status =
api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, &params);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: GPU Pool is not initialized";
}
} // namespace
const std::vector<AgentInfo>&
all_agents()
{
static std::shared_ptr<const std::vector<AgentInfo>> agents = AgentInfo::getAgents(
{.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info},
{.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info,
.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools,
.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate,
.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free,
.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access});
return *agents;
}
std::shared_ptr<const std::vector<AgentInfo>>
AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api)
{
std::vector<hsa_agent_t> agents;
std::shared_ptr<std::vector<AgentInfo>> agent_info_ptr =
std::make_shared<std::vector<AgentInfo>>();
auto& agent_info = *agent_info_ptr;
api.hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* data) {
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
return HSA_STATUS_SUCCESS;
},
&agents);
auto near_gpu_map = get_gpu_nodes_near_cpu();
std::unordered_map<int64_t, AgentInfo*> cpu_id_to_agent;
// Reserve is required to prevent reallocation (which breaks cpu_id_to_agent)
agent_info.reserve(agents.size());
for(auto& agent : agents)
{
auto& new_agent = agent_info.emplace_back(agent, api);
if(!new_agent.isGpu())
{
uint32_t cpu_numa_node_id;
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) !=
HSA_STATUS_SUCCESS)
<< "Could not fetch numa info";
new_agent.setNumaNode(cpu_numa_node_id);
cpu_id_to_agent[cpu_numa_node_id] = &new_agent;
init_cpu_pool(ext_api, new_agent);
}
else if(new_agent.isGpu())
{
uint32_t node_id;
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(
agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&node_id) != HSA_STATUS_SUCCESS)
<< "Could not fetch driver node id";
new_agent.setIndex(node_id);
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(agent,
static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NODE),
&node_id) != HSA_STATUS_SUCCESS)
<< "Could not fetch driver node id";
new_agent.setNumaNode(node_id);
init_gpu_pool(ext_api, new_agent);
}
}
// Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first
// NUMA nodes)
for(auto& agent : agent_info)
{
if(agent.isGpu())
{
auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode());
LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent);
auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu);
LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu);
agent.setNearCpuAgent((*id_to_agent)->getAgent());
agent.cpu_pool = (*id_to_agent)->cpu_pool;
agent.kernarg_pool = (*id_to_agent)->kernarg_pool;
}
}
return agent_info_ptr;
}
AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table)
: handle_(agent.handle)
, agent_(agent)
{
if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info failed";
}
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
memcpy(gfxip_, name_, gfxip_label_len);
gfxip_[gfxip_label_len] = '\0';
if(type_ != HSA_DEVICE_TYPE_GPU)
{
return;
}
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
if(table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&waves_per_cu_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed";
}
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
if(table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN,
&pci_domain_) != HSA_STATUS_SUCCESS ||
table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
&pci_location_id_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info for PCI info failed";
}
}
uint64_t
AgentInfo::getIndex() const
{
return index_;
}
hsa_device_type_t
AgentInfo::getType() const
{
return type_;
}
uint64_t
AgentInfo::getHandle() const
{
return handle_;
}
const std::string_view
AgentInfo::getName() const
{
return name_;
}
std::string
AgentInfo::getGfxip() const
{
return std::string(gfxip_);
}
uint32_t
AgentInfo::getMaxWaveSize() const
{
return max_wave_size_;
}
uint32_t
AgentInfo::getMaxQueueSize() const
{
return max_queue_size_;
}
uint32_t
AgentInfo::getCUCount() const
{
return cu_num_;
}
uint32_t
AgentInfo::getSimdCountPerCU() const
{
return simds_per_cu_;
}
uint32_t
AgentInfo::getShaderEngineCount() const
{
return se_num_;
}
uint32_t
AgentInfo::getShaderArraysPerSE() const
{
return shader_arrays_per_se_;
}
uint32_t
AgentInfo::getMaxWavesPerCU() const
{
return waves_per_cu_;
}
uint32_t
AgentInfo::getCUCountPerSH() const
{
return compute_units_per_sh_;
}
uint32_t
AgentInfo::getWaveSlotsPerSimd() const
{
return wave_slots_per_simd_;
}
uint32_t
AgentInfo::getPCIDomain() const
{
return pci_domain_;
}
uint32_t
AgentInfo::getPCILocationID() const
{
return pci_location_id_;
}
uint32_t
AgentInfo::getXccCount() const
{
return xcc_num_;
}
void
AgentInfo::setIndex(uint64_t index)
{
index_ = index;
}
void
AgentInfo::setType(hsa_device_type_t type)
{
type_ = type;
}
void
AgentInfo::setHandle(uint64_t handle)
{
handle_ = handle;
}
void
AgentInfo::setName(const std::string& name)
{
constexpr auto name_len = sizeof(name_) / sizeof(char);
//
// char* strncpy(char* destination, const char* source, size_t num)
//
// If the end of the source string (which is signaled by a null-character) is found before num
// characters have been copied, destination is padded with zeros until a total of num characters
// have been written to it
strncpy(name_, name.c_str(), name_len - 2);
// ensure always terminated
name_[name_len - 1] = '\0';
}
void
AgentInfo::setNumaNode(uint32_t numa_node)
{
numa_node_ = numa_node;
}
uint32_t
AgentInfo::getNumaNode() const
{
return numa_node_;
}
void
AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent)
{
near_cpu_agent_ = near_cpu_agent;
}
hsa_agent_t
AgentInfo::getNearCpuAgent()
{
return near_cpu_agent_;
}
} // namespace hsa
} // namespace rocprofiler
+174
Wyświetl plik
@@ -0,0 +1,174 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include "fmt/core.h"
#include "fmt/ranges.h"
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "lib/common/utility.hpp"
namespace rocprofiler
{
namespace hsa
{
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
class AgentInfo
{
public:
AgentInfo() = default;
AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table);
uint64_t getIndex() const;
hsa_device_type_t getType() const;
bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; }
bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; }
uint64_t getHandle() const;
const std::string_view getName() const;
const char* getNameChar() const { return name_; }
std::string getGfxip() const;
uint32_t getMaxWaveSize() const;
uint32_t getMaxQueueSize() const;
uint32_t getCUCount() const;
uint32_t getSimdCountPerCU() const;
uint32_t getShaderEngineCount() const;
uint32_t getShaderArraysPerSE() const;
uint32_t getMaxWavesPerCU() const;
uint32_t getCUCountPerSH() const;
uint32_t getWaveSlotsPerSimd() const;
uint32_t getPCIDomain() const;
uint32_t getPCILocationID() const;
uint32_t getXccCount() const;
void setIndex(uint64_t index);
void setType(hsa_device_type_t type);
void setHandle(uint64_t handle);
void setName(const std::string& name);
void setNumaNode(uint32_t numa_node);
uint32_t getNumaNode() const;
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
hsa_agent_t getNearCpuAgent();
hsa_agent_t getAgent() const { return agent_; }
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t kernarg_pool;
hsa_amd_memory_pool_t gpu_pool;
static std::shared_ptr<const std::vector<AgentInfo>> getAgents(const CoreApiTable&,
const AmdExtTable&);
// Keep move constuctors (i.e. std::move())
AgentInfo(AgentInfo&& other) noexcept = default;
AgentInfo& operator=(AgentInfo&& other) noexcept = default;
// Do not allow copying this class
AgentInfo(const AgentInfo&) = delete;
AgentInfo& operator=(const AgentInfo&) = delete;
private:
uint64_t index_ = 0;
hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint64_t handle_ = 0;
char name_[64] = {'\0'};
char gfxip_[64] = {'\0'};
uint32_t max_wave_size_ = 0;
uint32_t max_queue_size_ = 0;
uint32_t cu_num_ = 0;
uint32_t simds_per_cu_ = 0;
uint32_t se_num_ = 0;
uint32_t shader_arrays_per_se_ = 0;
uint32_t waves_per_cu_ = 0;
// CUs per SH/SA
uint32_t compute_units_per_sh_ = 0;
uint32_t wave_slots_per_simd_ = 0;
// Number of XCCs on the GPU
uint32_t xcc_num_ = 0;
uint32_t pci_domain_ = 0;
uint32_t pci_location_id_ = 0;
uint32_t numa_node_ = 0;
hsa_agent_t near_cpu_agent_ = {};
hsa_agent_t agent_ = {};
};
const std::vector<AgentInfo>&
all_agents();
} // namespace hsa
} // namespace rocprofiler
namespace fmt
{
template <>
struct formatter<rocprofiler::hsa::AgentInfo>
{
template <typename ParseContext>
constexpr auto parse(ParseContext& ctx)
{
return ctx.begin();
}
template <typename Ctx>
auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const
{
auto device_type_name = [](auto dev) -> std::string_view {
switch(dev)
{
case HSA_DEVICE_TYPE_CPU: return "CPU";
case HSA_DEVICE_TYPE_GPU: return "GPU";
case HSA_DEVICE_TYPE_DSP: return "DSP";
}
return "UNKNOWN";
};
return fmt::format_to(
ctx.out(),
R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})",
agent.getIndex(),
device_type_name(agent.getType()),
agent.getHandle(),
agent.getName(),
agent.getGfxip(),
agent.getMaxWaveSize(),
agent.getMaxQueueSize(),
agent.getCUCount(),
agent.getSimdCountPerCU(),
agent.getShaderEngineCount(),
agent.getShaderArraysPerSE(),
agent.getMaxWavesPerCU(),
agent.getCUCountPerSH(),
agent.getWaveSlotsPerSimd(),
agent.getPCIDomain(),
agent.getPCILocationID(),
agent.getXccCount());
}
};
} // namespace fmt
@@ -0,0 +1,60 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/fwd.h>
#include <rocprofiler/pc_sampling.h>
#include "lib/rocprofiler/registration.hpp"
namespace
{
template <typename... Tp>
auto
consume_args(Tp&&...)
{}
} // namespace
extern "C" {
rocprofiler_status_t
rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id,
rocprofiler_agent_t agent,
rocprofiler_pc_sampling_method_t method,
rocprofiler_pc_sampling_unit_t unit,
uint64_t interval,
rocprofiler_buffer_id_t buffer_id)
{
if(rocprofiler::registration::get_init_status() > 0)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
consume_args(context_id, agent, method, unit, interval, buffer_id);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
rocprofiler_status_t
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
rocprofiler_pc_sampling_configuration_t* config,
size_t* config_count)
{
consume_args(agent, config, config_count);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
}
+1 -66
Wyświetl plik
@@ -26,6 +26,7 @@
#include "lib/common/utility.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/context/domain.hpp"
#include "lib/rocprofiler/hsa/agent.hpp"
#include "lib/rocprofiler/hsa/hsa.hpp"
#include "lib/rocprofiler/registration.hpp"
@@ -56,70 +57,4 @@ rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts)
*ts = rocprofiler::common::timestamp_ns();
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
size_t agent_size,
void* user_data)
{
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
static const auto _default_pc_config =
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
1UL,
1000000000UL,
0};
static const auto _dummy_pc_configs = pc_sampling_config_vec_t{_default_pc_config};
static auto _default_cpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{0},
ROCPROFILER_AGENT_TYPE_CPU,
"cpu",
rocprofiler_pc_sampling_config_array_t{}};
static auto _default_gpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{1},
ROCPROFILER_AGENT_TYPE_GPU,
"gpu",
rocprofiler_pc_sampling_config_array_t{}};
// get the agents
auto _agents = std::vector<rocprofiler_agent_t*>{&_default_cpu_agent, &_default_gpu_agent};
auto _pc_sampling_config = std::vector<pc_sampling_config_vec_t>{};
for(auto* itr : _agents)
{
auto& _data = _pc_sampling_config.emplace_back();
if(itr->type == ROCPROFILER_AGENT_TYPE_GPU) _data = {_default_pc_config};
itr->pc_sampling_configs =
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()};
}
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
"rocprofiler");
return callback(_agents.data(), _agents.size(), user_data);
}
rocprofiler_status_t
rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id,
rocprofiler_agent_t agent,
rocprofiler_pc_sampling_method_t method,
rocprofiler_pc_sampling_unit_t unit,
uint64_t interval,
rocprofiler_buffer_id_t buffer_id)
{
if(rocprofiler::registration::get_init_status() > 0)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
consume_args(context_id, agent, method, unit, interval, buffer_id);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
rocprofiler_status_t
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
rocprofiler_pc_sampling_configuration_t* config,
size_t* config_count)
{
consume_args(agent, config, config_count);
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
}