Agent Implementation (#78)
* Agent Implementation
* Remove unused Findrocprofiler
* Update lib/rocprofiler/hsa/agent.{hpp,cpp}
- default AgentInfo ctor
- getNumaNode() const
- noexcept move ctors
- default initializers for member variables
- fixed clang-tidy recommentations
- preallocate
- static in anon namespace
- AgentInfo::setName uses strncpy and ensures that it is terminated
* Update lib/rocprofiler/rocprofiler.cpp (agent.cpp and pc_sampling.cpp)
- move public PC sampling function implementations to pc_sampling.cpp
- move public agent function implementation to agent.cpp
This commit is contained in:
zatwierdzone przez
GitHub
rodzic
5c07deb159
commit
6fb9000fa1
@@ -1,97 +0,0 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file
|
||||
# Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
|
||||
set(ROCM_PATH "$ENV{ROCM_PATH}")
|
||||
endif()
|
||||
|
||||
foreach(_DIR ${rocm_version_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler)
|
||||
if(EXISTS ${_DIR})
|
||||
get_filename_component(_ABS_DIR "${_DIR}" REALPATH)
|
||||
list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocprofiler_ROOT_DIR
|
||||
NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h
|
||||
HINTS ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES rocprofiler)
|
||||
|
||||
mark_as_advanced(rocprofiler_ROOT_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
rocprofiler_INCLUDE_DIR
|
||||
NAMES rocprofiler.h
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES include include/rocprofiler rocprofiler/include)
|
||||
|
||||
mark_as_advanced(rocprofiler_INCLUDE_DIR)
|
||||
|
||||
find_path(
|
||||
rocprofiler_hsa_INCLUDE_DIR
|
||||
NAMES hsa.h
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES include include/hsa)
|
||||
|
||||
mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
rocprofiler_LIBRARY
|
||||
NAMES rocprofiler64 rocprofiler
|
||||
HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
|
||||
${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR}
|
||||
${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES lib lib64
|
||||
NO_DEFAULT_PATH)
|
||||
|
||||
find_library(
|
||||
rocprofiler_hsa-runtime_LIBRARY
|
||||
NAMES hsa-runtime64 hsa-runtime
|
||||
HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
if(rocprofiler_LIBRARY)
|
||||
get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
|
||||
unset(_ROCM_ROCPROFILER_PATHS)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_package_handle_standard_args(
|
||||
rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR
|
||||
rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(rocprofiler_FOUND)
|
||||
add_library(rocprofiler::rocprofiler INTERFACE IMPORTED)
|
||||
add_library(rocprofiler::roctx INTERFACE IMPORTED)
|
||||
set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR}
|
||||
${rocprofiler_hsa_INCLUDE_DIR})
|
||||
set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY})
|
||||
set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR})
|
||||
|
||||
target_include_directories(
|
||||
rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR}
|
||||
${rocprofiler_hsa_INCLUDE_DIR})
|
||||
|
||||
target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES})
|
||||
endif()
|
||||
@@ -120,7 +120,8 @@ endif()
|
||||
rocprofiler_target_compile_options(
|
||||
rocprofiler-developer-flags
|
||||
LANGUAGES C CXX
|
||||
INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra")
|
||||
INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra"
|
||||
"-Wno-missing-field-initializers")
|
||||
|
||||
if(ROCPROFILER_BUILD_DEVELOPER)
|
||||
target_link_libraries(rocprofiler-build-flags
|
||||
|
||||
@@ -154,3 +154,17 @@ target_link_libraries(rocprofiler-amd-comgr INTERFACE amd_comgr)
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
target_link_libraries(rocprofiler-ptl INTERFACE PTL::ptl-static)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# amd aql
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_library(
|
||||
hsa-amd-aqlprofile64_library
|
||||
NAMES hsa-amd-aqlprofile64 hsa-amd-aqlprofile
|
||||
HINTS ${rocm_version_DIR} ${ROCM_PATH}
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH})
|
||||
|
||||
target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library})
|
||||
|
||||
@@ -50,3 +50,4 @@ rocprofiler_add_interface_library(rocprofiler-glog "Google Log library" INTERNAL
|
||||
rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL)
|
||||
|
||||
@@ -29,6 +29,7 @@ target_link_libraries(
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hip>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-amd-comgr>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-runtime>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>)
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>)
|
||||
set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME
|
||||
rocprofiler-common)
|
||||
|
||||
@@ -45,5 +45,21 @@ timestamp_ns()
|
||||
// TODO(jrmadsen): this should be updated to the HSA method
|
||||
return std::chrono::steady_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
template <class Container, typename Key = typename Container::key_type>
|
||||
const auto*
|
||||
get_val(const Container& map, const Key& key)
|
||||
{
|
||||
auto pos = map.find(key);
|
||||
return (pos != map.end() ? &pos->second : nullptr);
|
||||
}
|
||||
|
||||
template <class Container, typename Key = typename Container::key_type>
|
||||
auto*
|
||||
get_val(Container& map, const Key& key)
|
||||
{
|
||||
auto pos = map.find(key);
|
||||
return (pos != map.end() ? &pos->second : nullptr);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -5,8 +5,15 @@ rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(ROCPROFILER_LIB_HEADERS buffer.hpp internal_threading.hpp registration.hpp)
|
||||
set(ROCPROFILER_LIB_SOURCES
|
||||
buffer.cpp buffer_tracing.cpp callback_tracing.cpp context.cpp internal_threading.cpp
|
||||
rocprofiler.cpp registration.cpp)
|
||||
agent.cpp
|
||||
buffer.cpp
|
||||
buffer_tracing.cpp
|
||||
callback_tracing.cpp
|
||||
context.cpp
|
||||
internal_threading.cpp
|
||||
pc_sampling.cpp
|
||||
rocprofiler.cpp
|
||||
registration.cpp)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <rocprofiler/agent.h>
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler/hsa/agent.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
|
||||
size_t agent_size,
|
||||
void* user_data)
|
||||
{
|
||||
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
|
||||
|
||||
auto pc_sampling_configs = std::vector<pc_sampling_config_vec_t>{};
|
||||
auto get_agents = [&pc_sampling_configs]() {
|
||||
static const auto _default_pc_config =
|
||||
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
1UL,
|
||||
1000000000UL,
|
||||
0};
|
||||
auto temporaries_ = std::vector<rocprofiler_agent_t>{};
|
||||
const auto& agent_info = rocprofiler::hsa::all_agents();
|
||||
for(const auto& agent : agent_info)
|
||||
{
|
||||
auto& _data = pc_sampling_configs.emplace_back();
|
||||
if(agent.isGpu()) _data = {_default_pc_config};
|
||||
temporaries_.emplace_back(rocprofiler_agent_t{
|
||||
.id = rocprofiler_agent_id_t{.handle = temporaries_.size()},
|
||||
.type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU
|
||||
: (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU
|
||||
: ROCPROFILER_AGENT_TYPE_NONE)),
|
||||
.name = agent.getNameChar(),
|
||||
.pc_sampling_configs =
|
||||
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}});
|
||||
}
|
||||
return temporaries_;
|
||||
};
|
||||
|
||||
auto agents = get_agents();
|
||||
auto pointers = std::vector<rocprofiler_agent_t*>{};
|
||||
pointers.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
pointers.emplace_back(&agent);
|
||||
}
|
||||
|
||||
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
|
||||
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
|
||||
"rocprofiler");
|
||||
return callback(pointers.data(), pointers.size(), user_data);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,5 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp)
|
||||
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp agent.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp agent.hpp)
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
|
||||
${ROCPROFILER_LIB_HSA_HEADERS})
|
||||
|
||||
|
||||
@@ -0,0 +1,499 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "agent.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::unordered_map<long long, long long>
|
||||
get_gpu_nodes_near_cpu()
|
||||
{
|
||||
std::unordered_map<long long, long long> gpu_numa_nodes_near_cpu;
|
||||
long long gpu_numa_nodes_start = 0;
|
||||
|
||||
std::string path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
for(const auto& entry : fs::directory_iterator(path))
|
||||
{
|
||||
long long node_id = std::stoll(entry.path().filename().c_str());
|
||||
std::ifstream gpu_id_file;
|
||||
std::string gpu_path = entry.path().c_str();
|
||||
gpu_path += "/gpu_id";
|
||||
gpu_id_file.open(gpu_path);
|
||||
std::string gpu_id_str;
|
||||
if(gpu_id_file.is_open())
|
||||
{
|
||||
gpu_id_file >> gpu_id_str;
|
||||
|
||||
if(!gpu_id_str.empty())
|
||||
{
|
||||
auto gpu_id = std::stoll(gpu_id_str);
|
||||
if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0))
|
||||
{
|
||||
gpu_numa_nodes_start = node_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
gpu_id_file.close();
|
||||
}
|
||||
|
||||
path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
for(const auto& entry : fs::directory_iterator(path))
|
||||
{
|
||||
long long node_id = std::stoll(entry.path().filename().c_str());
|
||||
std::string numa_node_path = entry.path().c_str();
|
||||
long long agent_id = std::stoll(entry.path().filename().c_str());
|
||||
if(agent_id >= gpu_numa_nodes_start)
|
||||
{
|
||||
numa_node_path += "/io_links";
|
||||
for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path))
|
||||
{
|
||||
std::string numa_node_entry_properties_path = numa_node_entry.path().c_str();
|
||||
numa_node_entry_properties_path += "/properties";
|
||||
std::ifstream gpu_properties_file;
|
||||
gpu_properties_file.open(numa_node_entry_properties_path);
|
||||
std::string gpu_properties_file_line;
|
||||
if(gpu_properties_file.is_open())
|
||||
{
|
||||
while(gpu_properties_file)
|
||||
{
|
||||
std::getline(gpu_properties_file, gpu_properties_file_line);
|
||||
std::string delimiter = " ";
|
||||
std::stringstream ss(gpu_properties_file_line);
|
||||
std::string word;
|
||||
ss >> word;
|
||||
if(word == "node_to")
|
||||
{
|
||||
ss >> word;
|
||||
long long near_cpu_node_id = std::stoll(word);
|
||||
if(near_cpu_node_id < gpu_numa_nodes_start)
|
||||
{
|
||||
gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
gpu_properties_file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
return gpu_numa_nodes_near_cpu;
|
||||
}
|
||||
|
||||
// This function checks to see if the provided
|
||||
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
|
||||
// the function adds an additional requirement that the pool have the
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
|
||||
// pools must NOT have this property.
|
||||
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
hsa_status_t
|
||||
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg)
|
||||
{
|
||||
if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
auto [api_ptr, pool_ptr] =
|
||||
*static_cast<std::pair<const AmdExtTable*, hsa_amd_memory_pool_t*>*>(data);
|
||||
hsa_amd_segment_t segment;
|
||||
LOG_IF(FATAL,
|
||||
api_ptr->hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR)
|
||||
<< "Could not get pool segment";
|
||||
if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flag;
|
||||
LOG_IF(FATAL,
|
||||
api_ptr->hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR)
|
||||
<< "Could not get flag value";
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg))
|
||||
{
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
*(pool_ptr) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t
|
||||
FindStandardPool(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
return FindGlobalPool(pool, data, false);
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t
|
||||
FindKernArgPool(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
void
|
||||
init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent)
|
||||
{
|
||||
CHECK(!cpu_agent.isGpu());
|
||||
auto params = std::make_pair(&api, &cpu_agent.cpu_pool);
|
||||
|
||||
auto status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, ¶ms);
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: Command Buffer Pool is not initialized";
|
||||
|
||||
params.second = &cpu_agent.kernarg_pool;
|
||||
status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params));
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: Output Buffer Pool is not initialized";
|
||||
}
|
||||
|
||||
void
|
||||
init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info)
|
||||
{
|
||||
CHECK(agent_info.isGpu());
|
||||
auto params = std::make_pair(&api, &agent_info.gpu_pool);
|
||||
auto status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, ¶ms);
|
||||
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: GPU Pool is not initialized";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const std::vector<AgentInfo>&
|
||||
all_agents()
|
||||
{
|
||||
static std::shared_ptr<const std::vector<AgentInfo>> agents = AgentInfo::getAgents(
|
||||
{.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info},
|
||||
{.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info,
|
||||
.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools,
|
||||
.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate,
|
||||
.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free,
|
||||
.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access});
|
||||
return *agents;
|
||||
}
|
||||
|
||||
std::shared_ptr<const std::vector<AgentInfo>>
|
||||
AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api)
|
||||
{
|
||||
std::vector<hsa_agent_t> agents;
|
||||
std::shared_ptr<std::vector<AgentInfo>> agent_info_ptr =
|
||||
std::make_shared<std::vector<AgentInfo>>();
|
||||
auto& agent_info = *agent_info_ptr;
|
||||
|
||||
api.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
|
||||
auto near_gpu_map = get_gpu_nodes_near_cpu();
|
||||
std::unordered_map<int64_t, AgentInfo*> cpu_id_to_agent;
|
||||
|
||||
// Reserve is required to prevent reallocation (which breaks cpu_id_to_agent)
|
||||
agent_info.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
auto& new_agent = agent_info.emplace_back(agent, api);
|
||||
if(!new_agent.isGpu())
|
||||
{
|
||||
uint32_t cpu_numa_node_id;
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch numa info";
|
||||
new_agent.setNumaNode(cpu_numa_node_id);
|
||||
cpu_id_to_agent[cpu_numa_node_id] = &new_agent;
|
||||
init_cpu_pool(ext_api, new_agent);
|
||||
}
|
||||
else if(new_agent.isGpu())
|
||||
{
|
||||
uint32_t node_id;
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(
|
||||
agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&node_id) != HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch driver node id";
|
||||
new_agent.setIndex(node_id);
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NODE),
|
||||
&node_id) != HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch driver node id";
|
||||
new_agent.setNumaNode(node_id);
|
||||
init_gpu_pool(ext_api, new_agent);
|
||||
}
|
||||
}
|
||||
|
||||
// Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first
|
||||
// NUMA nodes)
|
||||
for(auto& agent : agent_info)
|
||||
{
|
||||
if(agent.isGpu())
|
||||
{
|
||||
auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode());
|
||||
LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent);
|
||||
|
||||
auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu);
|
||||
LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu);
|
||||
agent.setNearCpuAgent((*id_to_agent)->getAgent());
|
||||
agent.cpu_pool = (*id_to_agent)->cpu_pool;
|
||||
agent.kernarg_pool = (*id_to_agent)->kernarg_pool;
|
||||
}
|
||||
}
|
||||
return agent_info_ptr;
|
||||
}
|
||||
|
||||
AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table)
|
||||
: handle_(agent.handle)
|
||||
, agent_(agent)
|
||||
{
|
||||
if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info failed";
|
||||
}
|
||||
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
|
||||
|
||||
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
|
||||
memcpy(gfxip_, name_, gfxip_label_len);
|
||||
gfxip_[gfxip_label_len] = '\0';
|
||||
|
||||
if(type_ != HSA_DEVICE_TYPE_GPU)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
|
||||
|
||||
if(table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
|
||||
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
|
||||
table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
|
||||
&waves_per_cu_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed";
|
||||
}
|
||||
|
||||
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
|
||||
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
|
||||
|
||||
if(table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN,
|
||||
&pci_domain_) != HSA_STATUS_SUCCESS ||
|
||||
table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
|
||||
&pci_location_id_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info for PCI info failed";
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
AgentInfo::getIndex() const
|
||||
{
|
||||
return index_;
|
||||
}
|
||||
|
||||
hsa_device_type_t
|
||||
AgentInfo::getType() const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
AgentInfo::getHandle() const
|
||||
{
|
||||
return handle_;
|
||||
}
|
||||
|
||||
const std::string_view
|
||||
AgentInfo::getName() const
|
||||
{
|
||||
return name_;
|
||||
}
|
||||
|
||||
std::string
|
||||
AgentInfo::getGfxip() const
|
||||
{
|
||||
return std::string(gfxip_);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxWaveSize() const
|
||||
{
|
||||
return max_wave_size_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxQueueSize() const
|
||||
{
|
||||
return max_queue_size_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getCUCount() const
|
||||
{
|
||||
return cu_num_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getSimdCountPerCU() const
|
||||
{
|
||||
return simds_per_cu_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getShaderEngineCount() const
|
||||
{
|
||||
return se_num_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getShaderArraysPerSE() const
|
||||
{
|
||||
return shader_arrays_per_se_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxWavesPerCU() const
|
||||
{
|
||||
return waves_per_cu_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getCUCountPerSH() const
|
||||
{
|
||||
return compute_units_per_sh_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getWaveSlotsPerSimd() const
|
||||
{
|
||||
return wave_slots_per_simd_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getPCIDomain() const
|
||||
{
|
||||
return pci_domain_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getPCILocationID() const
|
||||
{
|
||||
return pci_location_id_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getXccCount() const
|
||||
{
|
||||
return xcc_num_;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setIndex(uint64_t index)
|
||||
{
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setType(hsa_device_type_t type)
|
||||
{
|
||||
type_ = type;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setHandle(uint64_t handle)
|
||||
{
|
||||
handle_ = handle;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setName(const std::string& name)
|
||||
{
|
||||
constexpr auto name_len = sizeof(name_) / sizeof(char);
|
||||
//
|
||||
// char* strncpy(char* destination, const char* source, size_t num)
|
||||
//
|
||||
// If the end of the source string (which is signaled by a null-character) is found before num
|
||||
// characters have been copied, destination is padded with zeros until a total of num characters
|
||||
// have been written to it
|
||||
strncpy(name_, name.c_str(), name_len - 2);
|
||||
// ensure always terminated
|
||||
name_[name_len - 1] = '\0';
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setNumaNode(uint32_t numa_node)
|
||||
{
|
||||
numa_node_ = numa_node;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getNumaNode() const
|
||||
{
|
||||
return numa_node_;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent)
|
||||
{
|
||||
near_cpu_agent_ = near_cpu_agent;
|
||||
}
|
||||
|
||||
hsa_agent_t
|
||||
AgentInfo::getNearCpuAgent()
|
||||
{
|
||||
return near_cpu_agent_;
|
||||
}
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,174 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include "fmt/core.h"
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
|
||||
|
||||
class AgentInfo
|
||||
{
|
||||
public:
|
||||
AgentInfo() = default;
|
||||
AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table);
|
||||
uint64_t getIndex() const;
|
||||
hsa_device_type_t getType() const;
|
||||
bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; }
|
||||
bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; }
|
||||
uint64_t getHandle() const;
|
||||
const std::string_view getName() const;
|
||||
const char* getNameChar() const { return name_; }
|
||||
std::string getGfxip() const;
|
||||
uint32_t getMaxWaveSize() const;
|
||||
uint32_t getMaxQueueSize() const;
|
||||
uint32_t getCUCount() const;
|
||||
uint32_t getSimdCountPerCU() const;
|
||||
uint32_t getShaderEngineCount() const;
|
||||
uint32_t getShaderArraysPerSE() const;
|
||||
uint32_t getMaxWavesPerCU() const;
|
||||
uint32_t getCUCountPerSH() const;
|
||||
uint32_t getWaveSlotsPerSimd() const;
|
||||
uint32_t getPCIDomain() const;
|
||||
uint32_t getPCILocationID() const;
|
||||
uint32_t getXccCount() const;
|
||||
|
||||
void setIndex(uint64_t index);
|
||||
void setType(hsa_device_type_t type);
|
||||
void setHandle(uint64_t handle);
|
||||
void setName(const std::string& name);
|
||||
|
||||
void setNumaNode(uint32_t numa_node);
|
||||
uint32_t getNumaNode() const;
|
||||
|
||||
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
|
||||
hsa_agent_t getNearCpuAgent();
|
||||
hsa_agent_t getAgent() const { return agent_; }
|
||||
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
|
||||
static std::shared_ptr<const std::vector<AgentInfo>> getAgents(const CoreApiTable&,
|
||||
const AmdExtTable&);
|
||||
|
||||
// Keep move constuctors (i.e. std::move())
|
||||
AgentInfo(AgentInfo&& other) noexcept = default;
|
||||
AgentInfo& operator=(AgentInfo&& other) noexcept = default;
|
||||
|
||||
// Do not allow copying this class
|
||||
AgentInfo(const AgentInfo&) = delete;
|
||||
AgentInfo& operator=(const AgentInfo&) = delete;
|
||||
|
||||
private:
|
||||
uint64_t index_ = 0;
|
||||
hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint64_t handle_ = 0;
|
||||
char name_[64] = {'\0'};
|
||||
char gfxip_[64] = {'\0'};
|
||||
uint32_t max_wave_size_ = 0;
|
||||
uint32_t max_queue_size_ = 0;
|
||||
uint32_t cu_num_ = 0;
|
||||
uint32_t simds_per_cu_ = 0;
|
||||
uint32_t se_num_ = 0;
|
||||
uint32_t shader_arrays_per_se_ = 0;
|
||||
uint32_t waves_per_cu_ = 0;
|
||||
// CUs per SH/SA
|
||||
uint32_t compute_units_per_sh_ = 0;
|
||||
uint32_t wave_slots_per_simd_ = 0;
|
||||
// Number of XCCs on the GPU
|
||||
uint32_t xcc_num_ = 0;
|
||||
|
||||
uint32_t pci_domain_ = 0;
|
||||
uint32_t pci_location_id_ = 0;
|
||||
|
||||
uint32_t numa_node_ = 0;
|
||||
hsa_agent_t near_cpu_agent_ = {};
|
||||
hsa_agent_t agent_ = {};
|
||||
};
|
||||
|
||||
const std::vector<AgentInfo>&
|
||||
all_agents();
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace fmt
|
||||
{
|
||||
template <>
|
||||
struct formatter<rocprofiler::hsa::AgentInfo>
|
||||
{
|
||||
template <typename ParseContext>
|
||||
constexpr auto parse(ParseContext& ctx)
|
||||
{
|
||||
return ctx.begin();
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const
|
||||
{
|
||||
auto device_type_name = [](auto dev) -> std::string_view {
|
||||
switch(dev)
|
||||
{
|
||||
case HSA_DEVICE_TYPE_CPU: return "CPU";
|
||||
case HSA_DEVICE_TYPE_GPU: return "GPU";
|
||||
case HSA_DEVICE_TYPE_DSP: return "DSP";
|
||||
}
|
||||
return "UNKNOWN";
|
||||
};
|
||||
|
||||
return fmt::format_to(
|
||||
ctx.out(),
|
||||
R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})",
|
||||
agent.getIndex(),
|
||||
device_type_name(agent.getType()),
|
||||
agent.getHandle(),
|
||||
agent.getName(),
|
||||
agent.getGfxip(),
|
||||
agent.getMaxWaveSize(),
|
||||
agent.getMaxQueueSize(),
|
||||
agent.getCUCount(),
|
||||
agent.getSimdCountPerCU(),
|
||||
agent.getShaderEngineCount(),
|
||||
agent.getShaderArraysPerSE(),
|
||||
agent.getMaxWavesPerCU(),
|
||||
agent.getCUCountPerSH(),
|
||||
agent.getWaveSlotsPerSimd(),
|
||||
agent.getPCIDomain(),
|
||||
agent.getPCILocationID(),
|
||||
agent.getXccCount());
|
||||
}
|
||||
};
|
||||
} // namespace fmt
|
||||
@@ -0,0 +1,60 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/pc_sampling.h>
|
||||
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename... Tp>
|
||||
auto
|
||||
consume_args(Tp&&...)
|
||||
{}
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_method_t method,
|
||||
rocprofiler_pc_sampling_unit_t unit,
|
||||
uint64_t interval,
|
||||
rocprofiler_buffer_id_t buffer_id)
|
||||
{
|
||||
if(rocprofiler::registration::get_init_status() > 0)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
consume_args(context_id, agent, method, unit, interval, buffer_id);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_configuration_t* config,
|
||||
size_t* config_count)
|
||||
{
|
||||
consume_args(agent, config, config_count);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/context/domain.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent.hpp"
|
||||
#include "lib/rocprofiler/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
|
||||
@@ -56,70 +57,4 @@ rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts)
|
||||
*ts = rocprofiler::common::timestamp_ns();
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
|
||||
size_t agent_size,
|
||||
void* user_data)
|
||||
{
|
||||
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
|
||||
|
||||
static const auto _default_pc_config =
|
||||
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
1UL,
|
||||
1000000000UL,
|
||||
0};
|
||||
static const auto _dummy_pc_configs = pc_sampling_config_vec_t{_default_pc_config};
|
||||
|
||||
static auto _default_cpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{0},
|
||||
ROCPROFILER_AGENT_TYPE_CPU,
|
||||
"cpu",
|
||||
rocprofiler_pc_sampling_config_array_t{}};
|
||||
static auto _default_gpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{1},
|
||||
ROCPROFILER_AGENT_TYPE_GPU,
|
||||
"gpu",
|
||||
rocprofiler_pc_sampling_config_array_t{}};
|
||||
|
||||
// get the agents
|
||||
auto _agents = std::vector<rocprofiler_agent_t*>{&_default_cpu_agent, &_default_gpu_agent};
|
||||
auto _pc_sampling_config = std::vector<pc_sampling_config_vec_t>{};
|
||||
|
||||
for(auto* itr : _agents)
|
||||
{
|
||||
auto& _data = _pc_sampling_config.emplace_back();
|
||||
if(itr->type == ROCPROFILER_AGENT_TYPE_GPU) _data = {_default_pc_config};
|
||||
itr->pc_sampling_configs =
|
||||
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()};
|
||||
}
|
||||
|
||||
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
|
||||
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
|
||||
"rocprofiler");
|
||||
return callback(_agents.data(), _agents.size(), user_data);
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id,
|
||||
rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_method_t method,
|
||||
rocprofiler_pc_sampling_unit_t unit,
|
||||
uint64_t interval,
|
||||
rocprofiler_buffer_id_t buffer_id)
|
||||
{
|
||||
if(rocprofiler::registration::get_init_status() > 0)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
consume_args(context_id, agent, method, unit, interval, buffer_id);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent,
|
||||
rocprofiler_pc_sampling_configuration_t* config,
|
||||
size_t* config_count)
|
||||
{
|
||||
consume_args(agent, config, config_count);
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user