From 6fb9000fa1cfd789c727018dc2244cdf6a3a6766 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 22 Sep 2023 13:51:21 -0500 Subject: [PATCH] Agent Implementation (#78) * Agent Implementation * Remove unused Findrocprofiler * Update lib/rocprofiler/hsa/agent.{hpp,cpp} - default AgentInfo ctor - getNumaNode() const - noexcept move ctors - default initializers for member variables - fixed clang-tidy recommentations - preallocate - static in anon namespace - AgentInfo::setName uses strncpy and ensures that it is terminated * Update lib/rocprofiler/rocprofiler.cpp (agent.cpp and pc_sampling.cpp) - move public PC sampling function implementations to pc_sampling.cpp - move public agent function implementation to agent.cpp --- cmake/Modules/Findrocprofiler.cmake | 97 ----- cmake/rocprofiler_build_settings.cmake | 3 +- cmake/rocprofiler_config_interfaces.cmake | 14 + cmake/rocprofiler_interfaces.cmake | 1 + source/lib/common/CMakeLists.txt | 3 +- source/lib/common/utility.hpp | 16 + source/lib/rocprofiler/CMakeLists.txt | 11 +- source/lib/rocprofiler/agent.cpp | 78 ++++ source/lib/rocprofiler/hsa/CMakeLists.txt | 8 +- source/lib/rocprofiler/hsa/agent.cpp | 499 ++++++++++++++++++++++ source/lib/rocprofiler/hsa/agent.hpp | 174 ++++++++ source/lib/rocprofiler/pc_sampling.cpp | 60 +++ source/lib/rocprofiler/rocprofiler.cpp | 67 +-- 13 files changed, 858 insertions(+), 173 deletions(-) delete mode 100644 cmake/Modules/Findrocprofiler.cmake create mode 100644 source/lib/rocprofiler/agent.cpp create mode 100644 source/lib/rocprofiler/hsa/agent.cpp create mode 100644 source/lib/rocprofiler/hsa/agent.hpp create mode 100644 source/lib/rocprofiler/pc_sampling.cpp diff --git a/cmake/Modules/Findrocprofiler.cmake b/cmake/Modules/Findrocprofiler.cmake deleted file mode 100644 index fea224fc1a..0000000000 --- a/cmake/Modules/Findrocprofiler.cmake +++ /dev/null @@ -1,97 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying file -# Copyright.txt or https://cmake.org/licensing for details. - -include(FindPackageHandleStandardArgs) - -# ----------------------------------------------------------------------------------------# - -if(NOT ROCM_PATH AND NOT "$ENV{ROCM_PATH}" STREQUAL "") - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -foreach(_DIR ${rocm_version_DIR} ${ROCM_PATH} /opt/rocm /opt/rocm/rocprofiler) - if(EXISTS ${_DIR}) - get_filename_component(_ABS_DIR "${_DIR}" REALPATH) - list(APPEND _ROCM_ROCPROFILER_PATHS ${_ABS_DIR}) - endif() -endforeach() - -# ----------------------------------------------------------------------------------------# - -find_path( - rocprofiler_ROOT_DIR - NAMES include/rocprofiler/rocprofiler.h include/rocprofiler.h - HINTS ${_ROCM_ROCPROFILER_PATHS} - PATHS ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES rocprofiler) - -mark_as_advanced(rocprofiler_ROOT_DIR) - -# ----------------------------------------------------------------------------------------# - -find_path( - rocprofiler_INCLUDE_DIR - NAMES rocprofiler.h - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES include include/rocprofiler rocprofiler/include) - -mark_as_advanced(rocprofiler_INCLUDE_DIR) - -find_path( - rocprofiler_hsa_INCLUDE_DIR - NAMES hsa.h - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES include include/hsa) - -mark_as_advanced(rocprofiler_hsa_INCLUDE_DIR) - -# ----------------------------------------------------------------------------------------# - -find_library( - rocprofiler_LIBRARY - NAMES rocprofiler64 rocprofiler - HINTS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR} - ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR}/rocprofiler ${rocprofiler_ROOT_DIR} - ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES lib lib64 - NO_DEFAULT_PATH) - -find_library( - rocprofiler_hsa-runtime_LIBRARY - NAMES hsa-runtime64 hsa-runtime - HINTS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATHS ${rocprofiler_ROOT_DIR} ${_ROCM_ROCPROFILER_PATHS} - PATH_SUFFIXES lib lib64) - -if(rocprofiler_LIBRARY) - get_filename_component(rocprofiler_LIBRARY_DIR "${rocprofiler_LIBRARY}" PATH CACHE) -endif() - -mark_as_advanced(rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY) -unset(_ROCM_ROCPROFILER_PATHS) - -# ----------------------------------------------------------------------------------------# - -find_package_handle_standard_args( - rocprofiler DEFAULT_MSG rocprofiler_ROOT_DIR rocprofiler_INCLUDE_DIR - rocprofiler_hsa_INCLUDE_DIR rocprofiler_LIBRARY rocprofiler_hsa-runtime_LIBRARY) - -# ----------------------------------------------------------------------------------------# - -if(rocprofiler_FOUND) - add_library(rocprofiler::rocprofiler INTERFACE IMPORTED) - add_library(rocprofiler::roctx INTERFACE IMPORTED) - set(rocprofiler_INCLUDE_DIRS ${rocprofiler_INCLUDE_DIR} - ${rocprofiler_hsa_INCLUDE_DIR}) - set(rocprofiler_LIBRARIES ${rocprofiler_LIBRARY} ${rocprofiler_hsa-runtime_LIBRARY}) - set(rocprofiler_LIBRARY_DIRS ${rocprofiler_LIBRARY_DIR}) - - target_include_directories( - rocprofiler::rocprofiler INTERFACE ${rocprofiler_INCLUDE_DIR} - ${rocprofiler_hsa_INCLUDE_DIR}) - - target_link_libraries(rocprofiler::rocprofiler INTERFACE ${rocprofiler_LIBRARIES}) -endif() diff --git a/cmake/rocprofiler_build_settings.cmake b/cmake/rocprofiler_build_settings.cmake index 10e851087b..01c5852e2e 100644 --- a/cmake/rocprofiler_build_settings.cmake +++ b/cmake/rocprofiler_build_settings.cmake @@ -120,7 +120,8 @@ endif() rocprofiler_target_compile_options( rocprofiler-developer-flags LANGUAGES C CXX - INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra") + INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra" + "-Wno-missing-field-initializers") if(ROCPROFILER_BUILD_DEVELOPER) target_link_libraries(rocprofiler-build-flags diff --git a/cmake/rocprofiler_config_interfaces.cmake b/cmake/rocprofiler_config_interfaces.cmake index ef08da847e..ea6b0c8f8d 100644 --- a/cmake/rocprofiler_config_interfaces.cmake +++ b/cmake/rocprofiler_config_interfaces.cmake @@ -154,3 +154,17 @@ target_link_libraries(rocprofiler-amd-comgr INTERFACE amd_comgr) # ----------------------------------------------------------------------------------------# target_link_libraries(rocprofiler-ptl INTERFACE PTL::ptl-static) + +# ----------------------------------------------------------------------------------------# +# +# amd aql +# +# ----------------------------------------------------------------------------------------# + +find_library( + hsa-amd-aqlprofile64_library + NAMES hsa-amd-aqlprofile64 hsa-amd-aqlprofile + HINTS ${rocm_version_DIR} ${ROCM_PATH} + PATHS ${rocm_version_DIR} ${ROCM_PATH}) + +target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library}) diff --git a/cmake/rocprofiler_interfaces.cmake b/cmake/rocprofiler_interfaces.cmake index 0a038d8d69..0a777cf05e 100644 --- a/cmake/rocprofiler_interfaces.cmake +++ b/cmake/rocprofiler_interfaces.cmake @@ -50,3 +50,4 @@ rocprofiler_add_interface_library(rocprofiler-glog "Google Log library" INTERNAL rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL) +rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL) diff --git a/source/lib/common/CMakeLists.txt b/source/lib/common/CMakeLists.txt index b918440a20..f67ca666fb 100644 --- a/source/lib/common/CMakeLists.txt +++ b/source/lib/common/CMakeLists.txt @@ -29,6 +29,7 @@ target_link_libraries( $ $ $ - $) + $ + $) set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME rocprofiler-common) diff --git a/source/lib/common/utility.hpp b/source/lib/common/utility.hpp index 9dac154279..5462121ce6 100644 --- a/source/lib/common/utility.hpp +++ b/source/lib/common/utility.hpp @@ -45,5 +45,21 @@ timestamp_ns() // TODO(jrmadsen): this should be updated to the HSA method return std::chrono::steady_clock::now().time_since_epoch().count(); } + +template +const auto* +get_val(const Container& map, const Key& key) +{ + auto pos = map.find(key); + return (pos != map.end() ? &pos->second : nullptr); +} + +template +auto* +get_val(Container& map, const Key& key) +{ + auto pos = map.find(key); + return (pos != map.end() ? &pos->second : nullptr); +} } // namespace common } // namespace rocprofiler diff --git a/source/lib/rocprofiler/CMakeLists.txt b/source/lib/rocprofiler/CMakeLists.txt index ddcef22188..cd4411eaf0 100644 --- a/source/lib/rocprofiler/CMakeLists.txt +++ b/source/lib/rocprofiler/CMakeLists.txt @@ -5,8 +5,15 @@ rocprofiler_activate_clang_tidy() set(ROCPROFILER_LIB_HEADERS buffer.hpp internal_threading.hpp registration.hpp) set(ROCPROFILER_LIB_SOURCES - buffer.cpp buffer_tracing.cpp callback_tracing.cpp context.cpp internal_threading.cpp - rocprofiler.cpp registration.cpp) + agent.cpp + buffer.cpp + buffer_tracing.cpp + callback_tracing.cpp + context.cpp + internal_threading.cpp + pc_sampling.cpp + rocprofiler.cpp + registration.cpp) # ----------------------------------------------------------------------------------------# # diff --git a/source/lib/rocprofiler/agent.cpp b/source/lib/rocprofiler/agent.cpp new file mode 100644 index 0000000000..c0397bea48 --- /dev/null +++ b/source/lib/rocprofiler/agent.cpp @@ -0,0 +1,78 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include + +#include "lib/rocprofiler/hsa/agent.hpp" + +#include + +extern "C" { +rocprofiler_status_t +rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback, + size_t agent_size, + void* user_data) +{ + using pc_sampling_config_vec_t = std::vector; + + auto pc_sampling_configs = std::vector{}; + auto get_agents = [&pc_sampling_configs]() { + static const auto _default_pc_config = + rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, + ROCPROFILER_PC_SAMPLING_UNIT_TIME, + 1UL, + 1000000000UL, + 0}; + auto temporaries_ = std::vector{}; + const auto& agent_info = rocprofiler::hsa::all_agents(); + for(const auto& agent : agent_info) + { + auto& _data = pc_sampling_configs.emplace_back(); + if(agent.isGpu()) _data = {_default_pc_config}; + temporaries_.emplace_back(rocprofiler_agent_t{ + .id = rocprofiler_agent_id_t{.handle = temporaries_.size()}, + .type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU + : (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU + : ROCPROFILER_AGENT_TYPE_NONE)), + .name = agent.getNameChar(), + .pc_sampling_configs = + rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}}); + } + return temporaries_; + }; + + auto agents = get_agents(); + auto pointers = std::vector{}; + pointers.reserve(agents.size()); + for(auto& agent : agents) + { + pointers.emplace_back(&agent); + } + + assert(agent_size <= sizeof(rocprofiler_agent_t) && + "rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in " + "rocprofiler"); + return callback(pointers.data(), pointers.size(), user_data); +} +} diff --git a/source/lib/rocprofiler/hsa/CMakeLists.txt b/source/lib/rocprofiler/hsa/CMakeLists.txt index 9ac68ecac2..54b6ea3d39 100644 --- a/source/lib/rocprofiler/hsa/CMakeLists.txt +++ b/source/lib/rocprofiler/hsa/CMakeLists.txt @@ -1,9 +1,5 @@ -# -# -# -set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp) -set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp) - +set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp agent.cpp) +set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp agent.hpp) target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES} ${ROCPROFILER_LIB_HSA_HEADERS}) diff --git a/source/lib/rocprofiler/hsa/agent.cpp b/source/lib/rocprofiler/hsa/agent.cpp new file mode 100644 index 0000000000..628aa692c3 --- /dev/null +++ b/source/lib/rocprofiler/hsa/agent.cpp @@ -0,0 +1,499 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "agent.hpp" + +#include +#include +#include + +#include "lib/common/utility.hpp" + +namespace fs = std::filesystem; + +namespace rocprofiler +{ +namespace hsa +{ +namespace +{ +std::unordered_map +get_gpu_nodes_near_cpu() +{ + std::unordered_map gpu_numa_nodes_near_cpu; + long long gpu_numa_nodes_start = 0; + + std::string path = "/sys/class/kfd/kfd/topology/nodes"; + for(const auto& entry : fs::directory_iterator(path)) + { + long long node_id = std::stoll(entry.path().filename().c_str()); + std::ifstream gpu_id_file; + std::string gpu_path = entry.path().c_str(); + gpu_path += "/gpu_id"; + gpu_id_file.open(gpu_path); + std::string gpu_id_str; + if(gpu_id_file.is_open()) + { + gpu_id_file >> gpu_id_str; + + if(!gpu_id_str.empty()) + { + auto gpu_id = std::stoll(gpu_id_str); + if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0)) + { + gpu_numa_nodes_start = node_id; + } + } + } + gpu_id_file.close(); + } + + path = "/sys/class/kfd/kfd/topology/nodes"; + for(const auto& entry : fs::directory_iterator(path)) + { + long long node_id = std::stoll(entry.path().filename().c_str()); + std::string numa_node_path = entry.path().c_str(); + long long agent_id = std::stoll(entry.path().filename().c_str()); + if(agent_id >= gpu_numa_nodes_start) + { + numa_node_path += "/io_links"; + for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path)) + { + std::string numa_node_entry_properties_path = numa_node_entry.path().c_str(); + numa_node_entry_properties_path += "/properties"; + std::ifstream gpu_properties_file; + gpu_properties_file.open(numa_node_entry_properties_path); + std::string gpu_properties_file_line; + if(gpu_properties_file.is_open()) + { + while(gpu_properties_file) + { + std::getline(gpu_properties_file, gpu_properties_file_line); + std::string delimiter = " "; + std::stringstream ss(gpu_properties_file_line); + std::string word; + ss >> word; + if(word == "node_to") + { + ss >> word; + long long near_cpu_node_id = std::stoll(word); + if(near_cpu_node_id < gpu_numa_nodes_start) + { + gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id; + } + } + } + } + gpu_properties_file.close(); + } + } + } + return gpu_numa_nodes_near_cpu; +} + +// This function checks to see if the provided +// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true, +// the function adds an additional requirement that the pool have the +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false, +// pools must NOT have this property. +// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is +// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but +// no pool was found meeting the requirements. If an error is encountered, we +// return that error. +hsa_status_t +FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) +{ + if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT; + + auto [api_ptr, pool_ptr] = + *static_cast*>(data); + hsa_amd_segment_t segment; + LOG_IF(FATAL, + api_ptr->hsa_amd_memory_pool_get_info_fn( + pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR) + << "Could not get pool segment"; + if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS; + + uint32_t flag; + LOG_IF(FATAL, + api_ptr->hsa_amd_memory_pool_get_info_fn( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR) + << "Could not get flag value"; + uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; + if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) + { + return HSA_STATUS_SUCCESS; + } + *(pool_ptr) = pool; + return HSA_STATUS_INFO_BREAK; +} + +// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that +// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT +hsa_status_t +FindStandardPool(hsa_amd_memory_pool_t pool, void* data) +{ + return FindGlobalPool(pool, data, false); +} + +// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that +// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT +hsa_status_t +FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) +{ + return FindGlobalPool(pool, data, true); +} + +void +init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent) +{ + CHECK(!cpu_agent.isGpu()); + auto params = std::make_pair(&api, &cpu_agent.cpu_pool); + + auto status = + api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, ¶ms); + LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + << "Error: Command Buffer Pool is not initialized"; + + params.second = &cpu_agent.kernarg_pool; + status = + api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params)); + LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + << "Error: Output Buffer Pool is not initialized"; +} + +void +init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info) +{ + CHECK(agent_info.isGpu()); + auto params = std::make_pair(&api, &agent_info.gpu_pool); + auto status = + api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, ¶ms); + + LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + << "Error: GPU Pool is not initialized"; +} + +} // namespace + +const std::vector& +all_agents() +{ + static std::shared_ptr> agents = AgentInfo::getAgents( + {.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info}, + {.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info, + .hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools, + .hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate, + .hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free, + .hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access}); + return *agents; +} + +std::shared_ptr> +AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api) +{ + std::vector agents; + std::shared_ptr> agent_info_ptr = + std::make_shared>(); + auto& agent_info = *agent_info_ptr; + + api.hsa_iterate_agents_fn( + [](hsa_agent_t agent, void* data) { + CHECK_NOTNULL(static_cast*>(data))->emplace_back(agent); + return HSA_STATUS_SUCCESS; + }, + &agents); + + auto near_gpu_map = get_gpu_nodes_near_cpu(); + std::unordered_map cpu_id_to_agent; + + // Reserve is required to prevent reallocation (which breaks cpu_id_to_agent) + agent_info.reserve(agents.size()); + for(auto& agent : agents) + { + auto& new_agent = agent_info.emplace_back(agent, api); + if(!new_agent.isGpu()) + { + uint32_t cpu_numa_node_id; + LOG_IF(FATAL, + api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) != + HSA_STATUS_SUCCESS) + << "Could not fetch numa info"; + new_agent.setNumaNode(cpu_numa_node_id); + cpu_id_to_agent[cpu_numa_node_id] = &new_agent; + init_cpu_pool(ext_api, new_agent); + } + else if(new_agent.isGpu()) + { + uint32_t node_id; + LOG_IF(FATAL, + api.hsa_agent_get_info_fn( + agent, + static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), + &node_id) != HSA_STATUS_SUCCESS) + << "Could not fetch driver node id"; + new_agent.setIndex(node_id); + LOG_IF(FATAL, + api.hsa_agent_get_info_fn(agent, + static_cast(HSA_AGENT_INFO_NODE), + &node_id) != HSA_STATUS_SUCCESS) + << "Could not fetch driver node id"; + new_agent.setNumaNode(node_id); + init_gpu_pool(ext_api, new_agent); + } + } + + // Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first + // NUMA nodes) + for(auto& agent : agent_info) + { + if(agent.isGpu()) + { + auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode()); + LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent); + + auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu); + LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu); + agent.setNearCpuAgent((*id_to_agent)->getAgent()); + agent.cpu_pool = (*id_to_agent)->cpu_pool; + agent.kernarg_pool = (*id_to_agent)->kernarg_pool; + } + } + return agent_info_ptr; +} + +AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table) +: handle_(agent.handle) +, agent_(agent) +{ + if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS) + { + LOG(FATAL) << "hsa_agent_get_info failed"; + } + + table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_); + + const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1); + memcpy(gfxip_, name_, gfxip_label_len); + gfxip_[gfxip_label_len] = '\0'; + + if(type_ != HSA_DEVICE_TYPE_GPU) + { + return; + } + + table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_); + table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_); + + table.hsa_agent_get_info_fn( + agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_); + + table.hsa_agent_get_info_fn( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_); + + table.hsa_agent_get_info_fn( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_); + + if(table.hsa_agent_get_info_fn(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, + &shader_arrays_per_se_) != HSA_STATUS_SUCCESS || + table.hsa_agent_get_info_fn(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, + &waves_per_cu_) != HSA_STATUS_SUCCESS) + { + LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed"; + } + + compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_); + wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_; + + if(table.hsa_agent_get_info_fn(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN, + &pci_domain_) != HSA_STATUS_SUCCESS || + table.hsa_agent_get_info_fn(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID, + &pci_location_id_) != HSA_STATUS_SUCCESS) + { + LOG(FATAL) << "hsa_agent_get_info for PCI info failed"; + } +} + +uint64_t +AgentInfo::getIndex() const +{ + return index_; +} + +hsa_device_type_t +AgentInfo::getType() const +{ + return type_; +} + +uint64_t +AgentInfo::getHandle() const +{ + return handle_; +} + +const std::string_view +AgentInfo::getName() const +{ + return name_; +} + +std::string +AgentInfo::getGfxip() const +{ + return std::string(gfxip_); +} + +uint32_t +AgentInfo::getMaxWaveSize() const +{ + return max_wave_size_; +} + +uint32_t +AgentInfo::getMaxQueueSize() const +{ + return max_queue_size_; +} + +uint32_t +AgentInfo::getCUCount() const +{ + return cu_num_; +} + +uint32_t +AgentInfo::getSimdCountPerCU() const +{ + return simds_per_cu_; +} + +uint32_t +AgentInfo::getShaderEngineCount() const +{ + return se_num_; +} + +uint32_t +AgentInfo::getShaderArraysPerSE() const +{ + return shader_arrays_per_se_; +} + +uint32_t +AgentInfo::getMaxWavesPerCU() const +{ + return waves_per_cu_; +} + +uint32_t +AgentInfo::getCUCountPerSH() const +{ + return compute_units_per_sh_; +} + +uint32_t +AgentInfo::getWaveSlotsPerSimd() const +{ + return wave_slots_per_simd_; +} + +uint32_t +AgentInfo::getPCIDomain() const +{ + return pci_domain_; +} + +uint32_t +AgentInfo::getPCILocationID() const +{ + return pci_location_id_; +} + +uint32_t +AgentInfo::getXccCount() const +{ + return xcc_num_; +} + +void +AgentInfo::setIndex(uint64_t index) +{ + index_ = index; +} + +void +AgentInfo::setType(hsa_device_type_t type) +{ + type_ = type; +} + +void +AgentInfo::setHandle(uint64_t handle) +{ + handle_ = handle; +} + +void +AgentInfo::setName(const std::string& name) +{ + constexpr auto name_len = sizeof(name_) / sizeof(char); + // + // char* strncpy(char* destination, const char* source, size_t num) + // + // If the end of the source string (which is signaled by a null-character) is found before num + // characters have been copied, destination is padded with zeros until a total of num characters + // have been written to it + strncpy(name_, name.c_str(), name_len - 2); + // ensure always terminated + name_[name_len - 1] = '\0'; +} + +void +AgentInfo::setNumaNode(uint32_t numa_node) +{ + numa_node_ = numa_node; +} + +uint32_t +AgentInfo::getNumaNode() const +{ + return numa_node_; +} + +void +AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent) +{ + near_cpu_agent_ = near_cpu_agent; +} + +hsa_agent_t +AgentInfo::getNearCpuAgent() +{ + return near_cpu_agent_; +} +} // namespace hsa +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/agent.hpp b/source/lib/rocprofiler/hsa/agent.hpp new file mode 100644 index 0000000000..ccecc9e896 --- /dev/null +++ b/source/lib/rocprofiler/hsa/agent.hpp @@ -0,0 +1,174 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include +#include + +#include "fmt/core.h" +#include "fmt/ranges.h" + +#include +#include +#include +#include + +#include "lib/common/utility.hpp" + +namespace rocprofiler +{ +namespace hsa +{ +static const uint32_t LDS_BLOCK_SIZE = 128 * 4; + +class AgentInfo +{ +public: + AgentInfo() = default; + AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table); + uint64_t getIndex() const; + hsa_device_type_t getType() const; + bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; } + bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; } + uint64_t getHandle() const; + const std::string_view getName() const; + const char* getNameChar() const { return name_; } + std::string getGfxip() const; + uint32_t getMaxWaveSize() const; + uint32_t getMaxQueueSize() const; + uint32_t getCUCount() const; + uint32_t getSimdCountPerCU() const; + uint32_t getShaderEngineCount() const; + uint32_t getShaderArraysPerSE() const; + uint32_t getMaxWavesPerCU() const; + uint32_t getCUCountPerSH() const; + uint32_t getWaveSlotsPerSimd() const; + uint32_t getPCIDomain() const; + uint32_t getPCILocationID() const; + uint32_t getXccCount() const; + + void setIndex(uint64_t index); + void setType(hsa_device_type_t type); + void setHandle(uint64_t handle); + void setName(const std::string& name); + + void setNumaNode(uint32_t numa_node); + uint32_t getNumaNode() const; + + void setNearCpuAgent(hsa_agent_t near_cpu_agent); + hsa_agent_t getNearCpuAgent(); + hsa_agent_t getAgent() const { return agent_; } + + hsa_amd_memory_pool_t cpu_pool; + hsa_amd_memory_pool_t kernarg_pool; + hsa_amd_memory_pool_t gpu_pool; + + static std::shared_ptr> getAgents(const CoreApiTable&, + const AmdExtTable&); + + // Keep move constuctors (i.e. std::move()) + AgentInfo(AgentInfo&& other) noexcept = default; + AgentInfo& operator=(AgentInfo&& other) noexcept = default; + + // Do not allow copying this class + AgentInfo(const AgentInfo&) = delete; + AgentInfo& operator=(const AgentInfo&) = delete; + +private: + uint64_t index_ = 0; + hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 + uint64_t handle_ = 0; + char name_[64] = {'\0'}; + char gfxip_[64] = {'\0'}; + uint32_t max_wave_size_ = 0; + uint32_t max_queue_size_ = 0; + uint32_t cu_num_ = 0; + uint32_t simds_per_cu_ = 0; + uint32_t se_num_ = 0; + uint32_t shader_arrays_per_se_ = 0; + uint32_t waves_per_cu_ = 0; + // CUs per SH/SA + uint32_t compute_units_per_sh_ = 0; + uint32_t wave_slots_per_simd_ = 0; + // Number of XCCs on the GPU + uint32_t xcc_num_ = 0; + + uint32_t pci_domain_ = 0; + uint32_t pci_location_id_ = 0; + + uint32_t numa_node_ = 0; + hsa_agent_t near_cpu_agent_ = {}; + hsa_agent_t agent_ = {}; +}; + +const std::vector& +all_agents(); +} // namespace hsa +} // namespace rocprofiler + +namespace fmt +{ +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const + { + auto device_type_name = [](auto dev) -> std::string_view { + switch(dev) + { + case HSA_DEVICE_TYPE_CPU: return "CPU"; + case HSA_DEVICE_TYPE_GPU: return "GPU"; + case HSA_DEVICE_TYPE_DSP: return "DSP"; + } + return "UNKNOWN"; + }; + + return fmt::format_to( + ctx.out(), + R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})", + agent.getIndex(), + device_type_name(agent.getType()), + agent.getHandle(), + agent.getName(), + agent.getGfxip(), + agent.getMaxWaveSize(), + agent.getMaxQueueSize(), + agent.getCUCount(), + agent.getSimdCountPerCU(), + agent.getShaderEngineCount(), + agent.getShaderArraysPerSE(), + agent.getMaxWavesPerCU(), + agent.getCUCountPerSH(), + agent.getWaveSlotsPerSimd(), + agent.getPCIDomain(), + agent.getPCILocationID(), + agent.getXccCount()); + } +}; +} // namespace fmt diff --git a/source/lib/rocprofiler/pc_sampling.cpp b/source/lib/rocprofiler/pc_sampling.cpp new file mode 100644 index 0000000000..fdd0619beb --- /dev/null +++ b/source/lib/rocprofiler/pc_sampling.cpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/rocprofiler/registration.hpp" + +namespace +{ +template +auto +consume_args(Tp&&...) +{} +} // namespace + +extern "C" { +rocprofiler_status_t +rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, + rocprofiler_agent_t agent, + rocprofiler_pc_sampling_method_t method, + rocprofiler_pc_sampling_unit_t unit, + uint64_t interval, + rocprofiler_buffer_id_t buffer_id) +{ + if(rocprofiler::registration::get_init_status() > 0) + return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + + consume_args(context_id, agent, method, unit, interval, buffer_id); + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; +} + +rocprofiler_status_t +rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent, + rocprofiler_pc_sampling_configuration_t* config, + size_t* config_count) +{ + consume_args(agent, config, config_count); + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; +} +} diff --git a/source/lib/rocprofiler/rocprofiler.cpp b/source/lib/rocprofiler/rocprofiler.cpp index 4fabea84f2..ea0b767a28 100644 --- a/source/lib/rocprofiler/rocprofiler.cpp +++ b/source/lib/rocprofiler/rocprofiler.cpp @@ -26,6 +26,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler/context/context.hpp" #include "lib/rocprofiler/context/domain.hpp" +#include "lib/rocprofiler/hsa/agent.hpp" #include "lib/rocprofiler/hsa/hsa.hpp" #include "lib/rocprofiler/registration.hpp" @@ -56,70 +57,4 @@ rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts) *ts = rocprofiler::common::timestamp_ns(); return ROCPROFILER_STATUS_SUCCESS; } - -rocprofiler_status_t -rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback, - size_t agent_size, - void* user_data) -{ - using pc_sampling_config_vec_t = std::vector; - - static const auto _default_pc_config = - rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, - ROCPROFILER_PC_SAMPLING_UNIT_TIME, - 1UL, - 1000000000UL, - 0}; - static const auto _dummy_pc_configs = pc_sampling_config_vec_t{_default_pc_config}; - - static auto _default_cpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{0}, - ROCPROFILER_AGENT_TYPE_CPU, - "cpu", - rocprofiler_pc_sampling_config_array_t{}}; - static auto _default_gpu_agent = rocprofiler_agent_t{rocprofiler_agent_id_t{1}, - ROCPROFILER_AGENT_TYPE_GPU, - "gpu", - rocprofiler_pc_sampling_config_array_t{}}; - - // get the agents - auto _agents = std::vector{&_default_cpu_agent, &_default_gpu_agent}; - auto _pc_sampling_config = std::vector{}; - - for(auto* itr : _agents) - { - auto& _data = _pc_sampling_config.emplace_back(); - if(itr->type == ROCPROFILER_AGENT_TYPE_GPU) _data = {_default_pc_config}; - itr->pc_sampling_configs = - rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}; - } - - assert(agent_size <= sizeof(rocprofiler_agent_t) && - "rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in " - "rocprofiler"); - return callback(_agents.data(), _agents.size(), user_data); -} - -rocprofiler_status_t -rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, - rocprofiler_agent_t agent, - rocprofiler_pc_sampling_method_t method, - rocprofiler_pc_sampling_unit_t unit, - uint64_t interval, - rocprofiler_buffer_id_t buffer_id) -{ - if(rocprofiler::registration::get_init_status() > 0) - return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; - - consume_args(context_id, agent, method, unit, interval, buffer_id); - return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; -} - -rocprofiler_status_t -rocprofiler_query_pc_sampling_agent_configurations(rocprofiler_agent_t agent, - rocprofiler_pc_sampling_configuration_t* config, - size_t* config_count) -{ - consume_args(agent, config, config_count); - return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; -} }