From 1bf4cb0893aecdc812ece2637ddee58654359ffd Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 10 Oct 2023 18:10:23 -0500 Subject: [PATCH] Agent information w/o using hsa-runtime (#100) * Agent information w/o using hsa-runtime - remove lib/rocprofiler/hsa/agent.{hpp,cpp} - update include/rocprofiler/agent.h - basically all possible info from /sys/class/kfd/kfd/topology/nodes/* * Print topology in rocprofiler_lib.agent test - hack to help diagnose errors * Update lib/rocprofiler/tests/details/agent.cpp - use LOG_IF(WARNING, ...) instead of LOG_IF(FATAL, ...) * Update lib/rocprofiler/tests/agent.cpp - print rocminfo at beginning of test - fix comparison of agent handle - misc other checks * Updte lib/rocprofiler/agent.cpp - handle unreadable /sys/class/kfd/kfd/topology/nodes//properties file * Update lib/tests/buffering/CMakeLists.txt - increase timeout to 120 - buffering.parallel will timeout when thread sanitizing is enabled * Update cmake: rocprofiler-drm - find drm headers and libraries * Update include/rocprofiler/agent.h - add family_id field * Update lib/rocprofiler/agent.cpp - parse /proc/cpuinfo for name, family, apicid, etc. - read_topology uses unique pointers to cleanup memory allocations - implement name and gfxip * Update lib/rocprofiler/tests/agent.cpp - improved failure message - check name/gfxip - remove check against hsa_agent_t.handle - this value is dependent on the address of C++ class * Update lib/rocprofiler/tests/details/agent.cpp - tweak gfxip_ variable which is broken for CPU * Update lib/rocprofiler/agent.cpp - update string handling for name and gfxip * Update lib/rocprofiler/tests/agent.cpp - minor output tweak * Update lib/rocprofiler/registration.{hpp,cpp} - registration::init_logging() function * Update lib/rocprofiler/agent.cpp - fix hex handling of GFX step version * Update lib/rocprofiler/tests/details/agent.cpp - fix format string when nearest CPUs not found * Update lib/rocprofiler/tests/CMakeLists.txt - exclude details/agent.cpp from being parsed for gtest tests * Update include/rocprofiler/fwd.h - add ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI status * Update lib/rocprofiler/tests/details/agent.{hpp,cpp} - replace with slightly modified implementation of rocminfo - primary change was not printing * Update lib/rocprofiler/tests/agent.cpp - update test to use rocminfo data * Update lib/rocprofiler/agent.cpp - add pc_sampling_configs - return error on incompatible ABI * Update counters and counters tests - rename test names for consistency - fixed incorrect spelling of derived * Add lib/rocprofiler/tests/{timestamp,version}.cpp - add timestamp and version tests for rocprofiler_get_timestamp and rocprofiler_get_version, respectively * Update lib/rocprofiler/tests/agent - fix double free of name_str from isa_info_t * Update include/rocprofiler/agent.h - comments for rocprofiler_agent_mem_bank_t - add rocprofiler_dim3_t - comments for rocprofiler_agent_t - add new fields to rocprofiler_agent_t - cu_count - workgroup_max_size - workgroup_max_dim - grid_max_size - grid_max_dim - vendor_name - product_name - change prototype of rocprofiler_available_agents_cb_t to be const agent** * Update lib/rocprofiler/agent.cpp - set size field - implement: - product_name - vendor_name - workgroup_max_size - workgroup_max_dim - grid_max_size - grid_max_dim - cu_count * Update lib/rocprofiler/tests/agent.cpp - changes for const agent* * Update samples/pc_sampling - updates for const agent* * Update lib/rocprofiler/agent.cpp - fix ABI compatibility check - return incompatible if tool agent is larger than our agent * Update include/rocprofiler/agent.h - doxygen comments - make size field of rocprofiler_agent_t uint64_t for consistency - add gpu_id via /sys/class/kfd/kfd/...//gpu_id - add model_name via /sys/class/kfd/kfd/...//name * Update lib/rocprofiler/agent.cpp - add read_file function (vector of strings) - support enum in read_property - assign model_name and gpu_id fields - remove unique_id * Update lib/rocprofiler/tests/details/agent.* - support family id, ucode_version, sdma_ucode_version * Update lib/rocprofiler/tests/agent.cpp - Add rocprofiler_lib.agent_abi test - Verify family_id, ucode_version, sdma_ucode_version [ROCm/rocprofiler-sdk commit: a798a26227376e43009168c239d8864ff598ab4d] --- .../cmake/rocprofiler_config_interfaces.cmake | 36 ++ .../cmake/rocprofiler_interfaces.cmake | 1 + .../samples/pc_sampling/common.h | 2 +- .../single-user-multiple-agents.cpp | 6 +- .../source/include/rocprofiler/agent.h | 152 ++++- .../source/include/rocprofiler/fwd.h | 2 + .../source/lib/common/CMakeLists.txt | 3 +- .../source/lib/rocprofiler/agent.cpp | 583 +++++++++++++++++- .../lib/rocprofiler/counters/metrics.cpp | 2 +- .../lib/rocprofiler/counters/metrics.hpp | 4 +- .../counters/tests/metrics_test.cpp | 14 +- .../source/lib/rocprofiler/hsa/CMakeLists.txt | 4 +- .../source/lib/rocprofiler/hsa/agent.cpp | 499 --------------- .../source/lib/rocprofiler/hsa/agent.hpp | 174 ------ .../source/lib/rocprofiler/registration.cpp | 44 +- .../source/lib/rocprofiler/registration.hpp | 4 + .../source/lib/rocprofiler/rocprofiler.cpp | 16 - .../lib/rocprofiler/tests/CMakeLists.txt | 4 +- .../source/lib/rocprofiler/tests/agent.cpp | 217 +++++++ .../lib/rocprofiler/tests/details/agent.cpp | 478 ++++++++++++++ .../lib/rocprofiler/tests/details/agent.hpp | 149 +++++ .../lib/rocprofiler/tests/timestamp.cpp | 41 ++ .../source/lib/rocprofiler/tests/version.cpp | 53 ++ .../source/lib/tests/buffering/CMakeLists.txt | 2 +- 24 files changed, 1718 insertions(+), 772 deletions(-) delete mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent.cpp delete mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent.hpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.cpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.hpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/tests/timestamp.cpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler/tests/version.cpp diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake index ea6b0c8f8d..80181221eb 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_config_interfaces.cmake @@ -168,3 +168,39 @@ find_library( PATHS ${rocm_version_DIR} ${ROCM_PATH}) target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library}) + +# ----------------------------------------------------------------------------------------# +# +# drm +# +# ----------------------------------------------------------------------------------------# + +find_path( + drm_INCLUDE_DIR + NAMES drm.h + HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATH_SUFFIXES include/drm include REQUIRED) + +find_path( + xf86drm_INCLUDE_DIR + NAMES xf86drm.h + HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATH_SUFFIXES include/drm include REQUIRED) + +find_library( + drm_LIBRARY + NAMES drm + HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED) + +find_library( + drm_amdgpu_LIBRARY + NAMES drm_amdgpu + HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu + PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED) + +target_include_directories(rocprofiler-drm SYSTEM INTERFACE ${drm_INCLUDE_DIR} + ${xf86drm_INCLUDE_DIR}) +target_link_libraries(rocprofiler-drm INTERFACE ${drm_LIBRARY} ${drm_amdgpu_LIBRARY}) diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake index 0a777cf05e..a9fc11cdec 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_interfaces.cmake @@ -51,3 +51,4 @@ rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" IN rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL) +rocprofiler_add_interface_library(rocprofiler-drm "drm (amdgpu) library" INTERNAL) diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/common.h b/projects/rocprofiler-sdk/samples/pc_sampling/common.h index 61b32dde88..41ac6f7766 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/common.h +++ b/projects/rocprofiler-sdk/samples/pc_sampling/common.h @@ -37,7 +37,7 @@ const std::string_view MI200_NAME = "gfx90a"; } static rocprofiler_status_t -find_first_gpu_agent_impl(rocprofiler_agent_t** agents, size_t num_agents, void* data) +find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, void* data) { // data is required if(!data) return ROCPROFILER_STATUS_ERROR; diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp index 4a323563ef..c966d34eeb 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp +++ b/projects/rocprofiler-sdk/samples/pc_sampling/single-user-multiple-agents.cpp @@ -18,9 +18,9 @@ std::vector contexts; std::vector buffer_ids; rocprofiler_status_t -find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_t** agents, - size_t num_agents, - void* data) +find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agents, + size_t num_agents, + void* data) { // data is required if(!data) return ROCPROFILER_STATUS_ERROR; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h index c54c665e9a..fe28a7492a 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/agent.h @@ -25,6 +25,10 @@ #include #include +#include + +#include + ROCPROFILER_EXTERN_C_INIT /** @@ -34,14 +38,147 @@ ROCPROFILER_EXTERN_C_INIT * @{ */ +/** + * @brief Cache information for an agent. + */ +typedef struct rocprofiler_agent_cache_t +{ + uint64_t processor_id_low; ///< Identifies the processor number + uint64_t size; ///< Size of the cache + uint32_t level; ///< Integer representing level: 1, 2, 3, 4, etc. + uint32_t cache_line_size; ///< Cache line size in bytes + uint32_t cache_lines_per_tag; ///< Cache lines per Cache Tag + uint32_t association; ///< Cache Associativity + uint32_t latency; ///< Cache latency in ns + HsaCacheType type; +} rocprofiler_agent_cache_t; + +/** + * @brief IO link information for an agent. + */ +typedef struct rocprofiler_agent_io_link_t +{ + HSA_IOLINKTYPE type; ///< Discoverable IoLink Properties (optional) + uint32_t version_major; ///< Bus interface version (optional) + uint32_t version_minor; ///< Bus interface version (optional) + uint32_t node_from; ///< See @ref rocprofiler_agent_id_t + uint32_t node_to; ///< See @ref rocprofiler_agent_id_t + uint32_t weight; ///< weight factor (derived from CDIT) + uint32_t min_latency; ///< minimum cost of time to transfer (rounded to ns) + uint32_t max_latency; ///< maximum cost of time to transfer (rounded to ns) + uint32_t min_bandwidth; ///< minimum interface Bandwidth in MB/s + uint32_t max_bandwidth; ///< maximum interface Bandwidth in MB/s + uint32_t recommended_transfer_size; ///< recommended transfer size to reach maximum bandwidth + ///< in bytes + HSA_LINKPROPERTY flags; ///< override flags (may be active for specific platforms) +} rocprofiler_agent_io_link_t; + +/** + * @brief Memory bank information for an agent. + */ +typedef struct rocprofiler_agent_mem_bank_t +{ + HSA_HEAPTYPE heap_type; + HSA_MEMORYPROPERTY flags; + uint32_t width; ///< the number of parallel bits of the memoryinterface + uint32_t mem_clk_max; ///< clock for the memory, this allows computing the available + ///< bandwidth to the memory when needed + uint64_t size_in_bytes; ///< physical memory size of the memory range in bytes +} rocprofiler_agent_mem_bank_t; + +/** + * @brief Multi-dimensional struct of data + */ +typedef struct rocprofiler_dim3_t +{ + uint32_t x; + uint32_t y; + uint32_t z; +} rocprofiler_dim3_t; + /** * @brief Agent. */ -typedef struct +typedef struct rocprofiler_agent_t { - rocprofiler_agent_id_t id; - rocprofiler_agent_type_t type; - const char* name; + uint64_t size; ///< set to sizeof(rocprofiler_agent_t) by rocprofiler. This can be used for + ///< versioning and compatibility handling + rocprofiler_agent_id_t id; ///< Internal opaque identifier + rocprofiler_agent_type_t type; ///< Enumeration for identifying the agent type (CPU, GPU, etc.) + uint32_t cpu_cores_count; ///< # of latency (= CPU) cores present on this HSA node. This value + ///< is 0 for a HSA node with no such cores, e.g a "discrete HSA GPU" + uint32_t simd_count; ///< # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a + ///< node. This value is 0 if no FCompute cores are present (e.g. pure + ///< "CPU node"). + uint32_t mem_banks_count; ///< # of discoverable memory bank affinity properties on this + ///< "H-NUMA" node. + uint32_t caches_count; ///< # of discoverable cache affinity properties on this "H-NUMA" node. + uint32_t io_links_count; ///< # of discoverable IO link affinity properties of this node + ///< connecting to other nodes. + uint32_t cpu_core_id_base; ///< low value of the logical processor ID of the latency (= CPU) + ///< cores available on this node + uint32_t simd_id_base; ///< low value of the logical processor ID of the throughput (= GPU) + ///< units available on this node + uint32_t max_waves_per_simd; ///< This identifies the max. number of launched waves per SIMD. + ///< If NumFComputeCores is 0, this value is ignored. + uint32_t lds_size_in_kb; ///< Size of Local Data Store in Kilobytes per SIMD Wavefront + uint32_t gds_size_in_kb; ///< Size of Global Data Store in Kilobytes shared across SIMD + ///< Wavefronts + uint32_t num_gws; ///< Number of GWS barriers + uint32_t wave_front_size; ///< Number of SIMD cores per wavefront executed, typically 64, may + ///< be 32 or a different value for some HSA based architectures + uint32_t num_xcc; ///< Number of XCC + uint32_t cu_count; ///< Number of compute units + uint32_t array_count; ///< Number of SIMD arrays + uint32_t num_shader_banks; ///< Number of Shader Banks or Shader Engines, typical values are 1 + ///< or 2 + uint32_t simd_arrays_per_engine; ///< Number of SIMD arrays per engine + uint32_t cu_per_simd_array; ///< Number of Compute Units (CU) per SIMD array + uint32_t simd_per_cu; ///< Number of SIMD representing a Compute Unit (CU) + uint32_t max_slots_scratch_cu; ///< Number of temp. memory ("scratch") wave slots available to + ///< access, may be 0 if HW has no restrictions + uint32_t gfx_target_version; ///< major_version=((value / 10000) % 100) + ///< minor_version=((value / 100) % 100) + ///< patch_version=(value % 100) + uint16_t vendor_id; ///< GPU vendor id; 0 on latency (= CPU)-only nodes + uint16_t device_id; ///< GPU device id; 0 on latency (= CPU)-only nodes + uint32_t location_id; ///< GPU BDF (Bus/Device/function number) - identifies the device + ///< location in the overall system + uint32_t domain; ///< PCI domain of the GPU + uint32_t drm_render_minor; ///< DRM render device minor device number + uint32_t num_sdma_engines; ///< number of PCIe optimized SDMA engines + uint32_t num_sdma_xgmi_engines; ///< number of XGMI optimized SDMA engines + uint32_t num_sdma_queues_per_engine; ///< number of SDMA queue per one engine + uint32_t num_cp_queues; ///< number of Compute queues + uint32_t max_engine_clk_ccompute; ///< maximum engine clocks for CPU, including any boost + ///< capabilities + uint32_t max_engine_clk_fcompute; ///< GPU only. Maximum engine clocks for GPU, including any + ///< boost capabilities + HSA_ENGINE_VERSION sdma_fw_version; ///< GPU only + HSA_ENGINE_ID + fw_version; ///< GPU only. Identifier (rev) of the GPU uEngine or Firmware, may be 0 + HSA_CAPABILITY capability; ///< GPU only + uint32_t cu_per_engine; ///< computed + uint32_t max_waves_per_cu; ///< computed + uint32_t family_id; ///< Family code + uint32_t workgroup_max_size; ///< GPU only. Maximum total number of work-items in a work-group. + uint32_t grid_max_size; ///< GPU only. Maximum number of fbarriers per work-group. Must be at + ///< least 32. + uint64_t local_mem_size; ///< GPU only. Local memory size + uint64_t hive_id; ///< XGMI Hive the GPU node belongs to in the system. It is an opaque and + ///< static number hash created by the PSP + uint64_t gpu_id; ///< GPU only. KFD identifier + rocprofiler_dim3_t workgroup_max_dim; ///< GPU only. Maximum number of work-items of each + ///< dimension of a work-group. + rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension + ///< of a grid. + rocprofiler_agent_mem_bank_t* mem_banks; + rocprofiler_agent_cache_t* caches; + rocprofiler_agent_io_link_t* io_links; + const char* name; ///< Name of the agent. Will be identical to product name for CPU + const char* vendor_name; ///< Vendor of agent (will be AMD) + const char* product_name; ///< Marketing name + const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc. rocprofiler_pc_sampling_config_array_t pc_sampling_configs; } rocprofiler_agent_t; @@ -53,9 +190,10 @@ typedef struct * @param [in] user_data Data pointer passback * @return ::rocprofiler_status_t */ -typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(rocprofiler_agent_t** agents, - size_t num_agents, - void* user_data); +typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)( + const rocprofiler_agent_t** agents, + size_t num_agents, + void* user_data); /** * @brief Receive synchronous callback with an array of available agents at moment of invocation diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h index e5ab4c0816..237a38eedc 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler/fwd.h @@ -67,6 +67,8 @@ typedef enum // NOLINT(performance-enum-size) ///< rocprofiler configuration (i.e. ///< function called post-initialization) ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented + ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible + ///< with current version of rocprofiler ROCPROFILER_STATUS_LAST, } rocprofiler_status_t; diff --git a/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt index ce7b3a382e..c152619de0 100644 --- a/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt @@ -30,6 +30,7 @@ target_link_libraries( $ $ $ - $) + $ + $) set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME rocprofiler-common) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp index c0397bea48..ba9f478994 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/agent.cpp @@ -24,55 +24,570 @@ #include #include -#include "lib/rocprofiler/hsa/agent.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +namespace rocprofiler +{ +namespace agent +{ +namespace +{ +namespace fs = ::std::filesystem; + +struct cpu_info +{ + long processor = -1; + long family = -1; + long model = -1; + long physical_id = -1; + long core_id = -1; + long apicid = -1; + std::string vendor_id = {}; + std::string model_name = {}; + + bool is_valid() const + { + return !(processor < 0 || family < 0 || model < 0 || physical_id < 0 || core_id < 0 || + apicid < 0 || vendor_id.empty() || model_name.empty()); + } +}; + +auto +parse_cpu_info() +{ + auto ifs = std::ifstream{"/proc/cpuinfo"}; + auto data = std::vector{}; + if(!ifs) return data; + + auto read_blocks = [&ifs]() { + auto blocks = std::vector>{}; + auto current_block = std::vector{}; + auto line = std::string{}; + while(std::getline(ifs, line)) + { + if(ifs.eof()) + { + if(!current_block.empty()) blocks.emplace_back(std::move(current_block)); + break; + } + + if(line.empty()) + { + if(!current_block.empty()) blocks.emplace_back(std::move(current_block)); + current_block.clear(); + } + else + { + current_block.emplace_back(line); + } + } + return blocks; + }; + + auto processor_blocks = read_blocks(); + auto processor_info = std::vector{}; + processor_info.reserve(processor_blocks.size()); + + for(const auto& bitr : processor_blocks) + { + auto info_v = cpu_info{}; + for(const auto& itr : bitr) + { + auto match = std::smatch{}; + const std::regex re{".*: (.*)$"}; + if(std::regex_match(itr, match, re)) + { + if(match.size() == 2) + { + std::ssub_match value = match[1]; + + if(itr.find("vendor_id") == 0) + info_v.vendor_id = value.str(); + else if(itr.find("model name") == 0) + info_v.model_name = value.str(); + else if(itr.find("processor") == 0) + info_v.processor = std::stol(value.str()); + else if(itr.find("cpu family") == 0) + info_v.family = std::stol(value.str()); + else if(itr.find("model") == 0 && itr.find("model name") != 0) + info_v.model = std::stol(value.str()); + else if(itr.find("physical id") == 0) + info_v.physical_id = std::stol(value.str()); + else if(itr.find("core id") == 0) + info_v.core_id = std::stol(value.str()); + else if(itr.find("apicid") == 0) + info_v.apicid = std::stol(value.str()); + } + } + } + if(info_v.is_valid()) + processor_info.emplace_back(info_v); + else + { + LOG(ERROR) << "Invalid processor info: " + << fmt::format("processor={}, vendor={}, family={}, model={}, name={}, " + "physical id={}, core id={}, apicid={}", + info_v.processor, + info_v.vendor_id, + info_v.family, + info_v.model, + info_v.model_name, + info_v.physical_id, + info_v.core_id, + info_v.apicid); + } + } + + return processor_info; +} + +auto& +get_cpu_info() +{ + static auto _v = parse_cpu_info(); + return _v; +} + +auto +read_file(const std::string& fname) +{ + auto data = std::vector{}; + auto ifs = std::ifstream{fname}; + if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + while(true) + { + auto value = std::string{}; + ifs >> value; + if(ifs.eof()) break; + + if(!value.empty()) data.emplace_back(value); + } + + return data; +} + +auto +read_map(const std::string& fname) +{ + auto data = std::unordered_map{}; + auto ifs = std::ifstream{fname}; + if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)}; + + while(true) + { + auto label = std::string{}; + ifs >> label; + if(ifs.eof()) break; + + auto entry = std::string{}; + ifs >> entry; + if(ifs.eof()) + throw std::runtime_error{ + fmt::format("unexpected file format in '{}' at {}", fname, label)}; + + auto ret = data.emplace(label, entry); + if(!ret.second) + throw std::runtime_error{fmt::format("duplicate entry in '{}': {}", fname, label)}; + } + + return data; +} + +template +void +read_property(const MapT& data, const std::string& label, Tp& value) +{ + if constexpr(std::is_enum::value) + { + using value_type = std::underlying_type_t; + // never expect this to be true but it does guard against infinite recursion + static_assert(!std::is_enum::value, "Expected non-enum type"); + + auto value_v = static_cast(value); + read_property(data, label, value_v); + value = static_cast(value_v); + } + else + { + static_assert(std::is_integral::value, "Expected integral type"); + using value_type = std::conditional_t::value, intmax_t, uintmax_t>; + + if(data.find(label) == data.end()) + { + LOG(ERROR) << "agent properties map missing " << label << " entry"; + return; + } + + auto iss = std::istringstream{data.at(label)}; + value_type local_value; + iss >> local_value; + + // verify that we have used the correct data sizes + constexpr auto min_value = std::numeric_limits::min(); + constexpr auto max_value = std::numeric_limits::max(); + if(local_value < min_value) + { + throw std::runtime_error{ + fmt::format("data with label {} has a value (={}) which is less " + "than the min value for the type (={})", + label, + local_value, + min_value)}; + } + else if(local_value > max_value) + { + throw std::runtime_error{fmt::format("data with label {} has a value (={}) which is " + "greater " + "than the max value for the type (={})", + label, + local_value, + max_value)}; + } + + value = static_cast(local_value); + } +} + +constexpr auto +compute_version(uint32_t major_v, uint32_t minor_v, uint32_t patch_v) +{ + return (major_v * 10000) + (minor_v * 100) + patch_v; +} + +auto +read_topology() +{ + using unique_agent_t = std::unique_ptr; + + auto sysfs_nodes_path = fs::path{"/sys/class/kfd/kfd/topology/nodes/"}; + if(!fs::exists(sysfs_nodes_path)) + throw std::runtime_error{ + fmt::format("sysfs nodes path '{}' does not exist", sysfs_nodes_path.string())}; + + using pc_sampling_config_vec_t = std::vector; + + auto mi200_pc_sampling_config = pc_sampling_config_vec_t{ + rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, + ROCPROFILER_PC_SAMPLING_UNIT_TIME, + 1UL, + 1000000000UL, + 0}}; + + const auto& cpu_info_v = get_cpu_info(); + auto data = std::vector{}; + uint64_t n = 0; + + while(true) + { + auto idx = n++; + auto node_path = sysfs_nodes_path / std::to_string(idx); + if(!fs::exists(node_path)) break; + + auto properties = std::unordered_map{}; + auto name_prop = std::vector{}; + auto gpu_id_prop = std::vector{}; + try + { + properties = read_map(node_path / "properties"); + name_prop = read_file(node_path / "name"); + gpu_id_prop = read_file(node_path / "gpu_id"); + } catch(std::runtime_error& e) + { + LOG(ERROR) << "Error reading '" << (node_path / "properties").string() + << "' :: " << e.what(); + continue; + } + + auto agent_info = rocprofiler_agent_t{}; + memset(&agent_info, 0, sizeof(agent_info)); + + agent_info.size = sizeof(rocprofiler_agent_t); + agent_info.id.handle = idx; + agent_info.type = ROCPROFILER_AGENT_TYPE_NONE; + + if(!name_prop.empty()) + agent_info.model_name = strdup(name_prop.front().c_str()); + else + agent_info.model_name = ""; + + if(!gpu_id_prop.empty()) agent_info.gpu_id = std::stoull(gpu_id_prop.front()); + + read_property(properties, "cpu_cores_count", agent_info.cpu_cores_count); + read_property(properties, "simd_count", agent_info.simd_count); + + if(agent_info.cpu_cores_count > 0) + agent_info.type = ROCPROFILER_AGENT_TYPE_CPU; + else if(agent_info.simd_count > 0) + agent_info.type = ROCPROFILER_AGENT_TYPE_GPU; + + read_property(properties, "mem_banks_count", agent_info.mem_banks_count); + read_property(properties, "caches_count", agent_info.caches_count); + read_property(properties, "io_links_count", agent_info.io_links_count); + read_property(properties, "cpu_core_id_base", agent_info.cpu_core_id_base); + read_property(properties, "simd_id_base", agent_info.simd_id_base); + read_property(properties, "max_waves_per_simd", agent_info.max_waves_per_simd); + read_property(properties, "lds_size_in_kb", agent_info.lds_size_in_kb); + read_property(properties, "gds_size_in_kb", agent_info.gds_size_in_kb); + read_property(properties, "num_gws", agent_info.num_gws); + read_property(properties, "wave_front_size", agent_info.wave_front_size); + read_property(properties, "array_count", agent_info.array_count); + read_property(properties, "simd_arrays_per_engine", agent_info.simd_arrays_per_engine); + read_property(properties, "cu_per_simd_array", agent_info.cu_per_simd_array); + read_property(properties, "simd_per_cu", agent_info.simd_per_cu); + read_property(properties, "max_slots_scratch_cu", agent_info.max_slots_scratch_cu); + read_property(properties, "gfx_target_version", agent_info.gfx_target_version); + read_property(properties, "vendor_id", agent_info.vendor_id); + read_property(properties, "device_id", agent_info.device_id); + read_property(properties, "location_id", agent_info.location_id); + read_property(properties, "domain", agent_info.domain); + read_property(properties, "drm_render_minor", agent_info.drm_render_minor); + read_property(properties, "hive_id", agent_info.hive_id); + read_property(properties, "num_sdma_engines", agent_info.num_sdma_engines); + read_property(properties, "num_sdma_xgmi_engines", agent_info.num_sdma_xgmi_engines); + read_property( + properties, "num_sdma_queues_per_engine", agent_info.num_sdma_queues_per_engine); + read_property(properties, "num_cp_queues", agent_info.num_cp_queues); + read_property(properties, "max_engine_clk_ccompute", agent_info.max_engine_clk_ccompute); + + agent_info.name = ""; + agent_info.product_name = ""; + agent_info.vendor_name = ""; + if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU) + { + constexpr auto workgrp_max = 1024; + constexpr auto grid_max = std::numeric_limits::max(); + + read_property( + properties, "max_engine_clk_fcompute", agent_info.max_engine_clk_fcompute); + read_property(properties, "local_mem_size", agent_info.local_mem_size); + read_property(properties, "fw_version", agent_info.fw_version.Value); + read_property(properties, "capability", agent_info.capability.Value); + read_property(properties, "sdma_fw_version", agent_info.sdma_fw_version.Value); + agent_info.fw_version.Value &= 0x3ff; + agent_info.sdma_fw_version.Value &= 0x3ff; + agent_info.workgroup_max_size = workgrp_max; // hardcoded in hsa-runtime + agent_info.workgroup_max_dim = {workgrp_max, workgrp_max, workgrp_max}; + agent_info.grid_max_size = grid_max; // hardcoded in hsa-runtime + agent_info.grid_max_dim = {grid_max, grid_max, grid_max}; + agent_info.cu_count = agent_info.simd_count / agent_info.simd_per_cu; + + if(int drm_fd = 0; (drm_fd = drmOpenRender(agent_info.drm_render_minor)) >= 0) + { + uint32_t major_version = 0; + uint32_t minor_version = 0; + auto* device_handle = amdgpu_device_handle{}; + if(amdgpu_device_initialize( + drm_fd, &major_version, &minor_version, &device_handle) == 0) + { + auto major = (agent_info.gfx_target_version / 10000) % 100; + auto minor = (agent_info.gfx_target_version / 100) % 100; + auto step = (agent_info.gfx_target_version % 100); + + agent_info.name = + strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str()); + agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle)); + agent_info.vendor_name = strdup("AMD"); + + amdgpu_gpu_info gpu_info = {}; + if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0) + { + agent_info.family_id = gpu_info.family_id; + } + amdgpu_device_deinitialize(device_handle); + } + drmClose(drm_fd); + } + + constexpr auto gfx90a_version = compute_version(9, 0, 10); + + if(agent_info.gfx_target_version >= gfx90a_version) + { + agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{ + mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()}; + } + } + else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU) + { + agent_info.cu_count = agent_info.cpu_cores_count; + agent_info.vendor_name = strdup("CPU"); + for(const auto& itr : cpu_info_v) + { + if(agent_info.cpu_core_id_base == itr.apicid) + { + agent_info.name = strdup(itr.model_name.c_str()); + agent_info.product_name = strdup(agent_info.name); + agent_info.family_id = itr.family; + break; + } + } + } + + if(properties.count("num_xcc") > 0) + read_property(properties, "num_xcc", agent_info.num_xcc); + else + agent_info.num_xcc = 1; + + agent_info.max_waves_per_cu = agent_info.simd_per_cu * agent_info.max_waves_per_simd; + + if(agent_info.simd_arrays_per_engine > 0) + { + agent_info.num_shader_banks = + agent_info.array_count / agent_info.simd_arrays_per_engine; + + // depends on above + if(agent_info.num_shader_banks * agent_info.simd_arrays_per_engine > 0) + { + agent_info.cu_per_engine = + (agent_info.simd_count / agent_info.simd_per_cu) / + (agent_info.num_shader_banks * agent_info.simd_arrays_per_engine); + } + } + + agent_info.mem_banks = nullptr; + agent_info.caches = nullptr; + agent_info.io_links = nullptr; + + if(agent_info.mem_banks_count > 0) + { + agent_info.mem_banks = new rocprofiler_agent_mem_bank_t[agent_info.mem_banks_count]; + + for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i) + { + using heap_type_t = HSA_HEAPTYPE; + using underlying_heap_type_t = std::underlying_type_t; + + auto subproperties = + read_map(node_path / "mem_banks" / std::to_string(i) / "properties"); + + auto _heap_type = underlying_heap_type_t{}; + read_property(subproperties, "heap_type", _heap_type); + agent_info.mem_banks[i].heap_type = static_cast(_heap_type); + + read_property( + subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes); + read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty); + read_property(subproperties, "width", agent_info.mem_banks[i].width); + read_property(subproperties, "mem_clk_max", agent_info.mem_banks[i].mem_clk_max); + } + } + + if(agent_info.caches_count > 0) + { + agent_info.caches = new rocprofiler_agent_cache_t[agent_info.caches_count]; + + for(uint32_t i = 0; i < agent_info.caches_count; ++i) + { + auto subproperties = + read_map(node_path / "caches" / std::to_string(i) / "properties"); + + read_property( + subproperties, "processor_id_low", agent_info.caches[i].processor_id_low); + read_property(subproperties, "level", agent_info.caches[i].level); + read_property(subproperties, "size", agent_info.caches[i].size); + read_property( + subproperties, "cache_line_size", agent_info.caches[i].cache_line_size); + read_property( + subproperties, "cache_lines_per_tag", agent_info.caches[i].cache_lines_per_tag); + read_property(subproperties, "association", agent_info.caches[i].association); + read_property(subproperties, "latency", agent_info.caches[i].latency); + read_property(subproperties, "type", agent_info.caches[i].type.Value); + } + } + + if(agent_info.io_links_count > 0) + { + agent_info.io_links = new rocprofiler_agent_io_link_t[agent_info.io_links_count]; + + for(uint32_t i = 0; i < agent_info.io_links_count; ++i) + { + auto subproperties = + read_map(node_path / "io_links" / std::to_string(i) / "properties"); + + read_property(subproperties, "type", agent_info.io_links[i].type); + read_property(subproperties, "version_major", agent_info.io_links[i].version_major); + read_property(subproperties, "version_minor", agent_info.io_links[i].version_minor); + read_property(subproperties, "node_from", agent_info.io_links[i].node_from); + read_property(subproperties, "node_to", agent_info.io_links[i].node_to); + read_property(subproperties, "weight", agent_info.io_links[i].weight); + read_property(subproperties, "min_latency", agent_info.io_links[i].min_latency); + read_property(subproperties, "max_latency", agent_info.io_links[i].max_latency); + read_property(subproperties, "min_bandwidth", agent_info.io_links[i].min_bandwidth); + read_property(subproperties, "max_bandwidth", agent_info.io_links[i].max_bandwidth); + read_property(subproperties, + "recommended_transfer_size", + agent_info.io_links[i].recommended_transfer_size); + read_property(subproperties, "flags", agent_info.io_links[i].flags.LinkProperty); + } + } + + data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) { + if(ptr) + { + auto free_cstring = [](const char*& val) { + if(val && ::strnlen(val, 1) > 0) ::free(const_cast(val)); + val = ""; + }; + + delete[] ptr->mem_banks; + delete[] ptr->caches; + delete[] ptr->io_links; + free_cstring(ptr->name); + free_cstring(ptr->vendor_name); + free_cstring(ptr->product_name); + free_cstring(ptr->model_name); + } + delete ptr; + }); + } + return data; +} + +auto& +get_agent_topology() +{ + static auto _v = read_topology(); + return _v; +} +} // namespace +} // namespace agent +} // namespace rocprofiler + extern "C" { rocprofiler_status_t rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback, size_t agent_size, void* user_data) { - using pc_sampling_config_vec_t = std::vector; + if(agent_size > sizeof(rocprofiler_agent_t)) + { + LOG(ERROR) << "rocprofiler_agent_t used by caller is ABI-incompatible with " + "rocprofiler_agent_t in rocprofiler"; + return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI; + } - auto pc_sampling_configs = std::vector{}; - auto get_agents = [&pc_sampling_configs]() { - static const auto _default_pc_config = - rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, - ROCPROFILER_PC_SAMPLING_UNIT_TIME, - 1UL, - 1000000000UL, - 0}; - auto temporaries_ = std::vector{}; - const auto& agent_info = rocprofiler::hsa::all_agents(); - for(const auto& agent : agent_info) - { - auto& _data = pc_sampling_configs.emplace_back(); - if(agent.isGpu()) _data = {_default_pc_config}; - temporaries_.emplace_back(rocprofiler_agent_t{ - .id = rocprofiler_agent_id_t{.handle = temporaries_.size()}, - .type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU - : (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU - : ROCPROFILER_AGENT_TYPE_NONE)), - .name = agent.getNameChar(), - .pc_sampling_configs = - rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}}); - } - return temporaries_; - }; - - auto agents = get_agents(); - auto pointers = std::vector{}; + // auto agents = get_agents(); + auto& agents = rocprofiler::agent::get_agent_topology(); + auto pointers = std::vector{}; pointers.reserve(agents.size()); for(auto& agent : agents) { - pointers.emplace_back(&agent); + pointers.emplace_back(agent.get()); } - assert(agent_size <= sizeof(rocprofiler_agent_t) && - "rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in " - "rocprofiler"); return callback(pointers.data(), pointers.size(), user_data); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.cpp index 29ab5f390f..2e6caac1d7 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.cpp @@ -104,7 +104,7 @@ findViaEnvironment(const std::string& filename) } // namespace MetricMap -getDerrivedHardwareMetrics() +getDerivedHardwareMetrics() { return loadXml(findViaEnvironment("derived_counters.xml")); } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.hpp index c4385c7c79..641f2bc3b7 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/metrics.hpp @@ -48,7 +48,7 @@ MetricMap getBaseHardwareMetrics(); MetricMap -getDerrivedHardwareMetrics(); +getDerivedHardwareMetrics(); } // namespace counters @@ -98,4 +98,4 @@ struct formatter return fmt::format_to(ctx.out(), "{}", out); } }; -} // namespace fmt \ No newline at end of file +} // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/tests/metrics_test.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/tests/metrics_test.cpp index e46ee27694..fa6b2f317e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/tests/metrics_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/counters/tests/metrics_test.cpp @@ -7,13 +7,13 @@ namespace { auto -loadTestData(std::unordered_map>> map) +loadTestData(const std::unordered_map>>& map) { std::unordered_map> ret; - for(auto& [gfx, dataMap] : map) + for(const auto& [gfx, dataMap] : map) { auto& metric_vec = ret.emplace(gfx, std::vector{}).first->second; - for(auto& data_vec : dataMap) + for(const auto& data_vec : dataMap) { metric_vec.emplace_back( data_vec.at(0), data_vec.at(1), data_vec.at(2), data_vec.at(4), data_vec.at(3)); @@ -23,7 +23,7 @@ loadTestData(std::unordered_map -#include -#include - -#include "lib/common/utility.hpp" - -namespace fs = std::filesystem; - -namespace rocprofiler -{ -namespace hsa -{ -namespace -{ -std::unordered_map -get_gpu_nodes_near_cpu() -{ - std::unordered_map gpu_numa_nodes_near_cpu; - long long gpu_numa_nodes_start = 0; - - std::string path = "/sys/class/kfd/kfd/topology/nodes"; - for(const auto& entry : fs::directory_iterator(path)) - { - long long node_id = std::stoll(entry.path().filename().c_str()); - std::ifstream gpu_id_file; - std::string gpu_path = entry.path().c_str(); - gpu_path += "/gpu_id"; - gpu_id_file.open(gpu_path); - std::string gpu_id_str; - if(gpu_id_file.is_open()) - { - gpu_id_file >> gpu_id_str; - - if(!gpu_id_str.empty()) - { - auto gpu_id = std::stoll(gpu_id_str); - if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0)) - { - gpu_numa_nodes_start = node_id; - } - } - } - gpu_id_file.close(); - } - - path = "/sys/class/kfd/kfd/topology/nodes"; - for(const auto& entry : fs::directory_iterator(path)) - { - long long node_id = std::stoll(entry.path().filename().c_str()); - std::string numa_node_path = entry.path().c_str(); - long long agent_id = std::stoll(entry.path().filename().c_str()); - if(agent_id >= gpu_numa_nodes_start) - { - numa_node_path += "/io_links"; - for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path)) - { - std::string numa_node_entry_properties_path = numa_node_entry.path().c_str(); - numa_node_entry_properties_path += "/properties"; - std::ifstream gpu_properties_file; - gpu_properties_file.open(numa_node_entry_properties_path); - std::string gpu_properties_file_line; - if(gpu_properties_file.is_open()) - { - while(gpu_properties_file) - { - std::getline(gpu_properties_file, gpu_properties_file_line); - std::string delimiter = " "; - std::stringstream ss(gpu_properties_file_line); - std::string word; - ss >> word; - if(word == "node_to") - { - ss >> word; - long long near_cpu_node_id = std::stoll(word); - if(near_cpu_node_id < gpu_numa_nodes_start) - { - gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id; - } - } - } - } - gpu_properties_file.close(); - } - } - } - return gpu_numa_nodes_near_cpu; -} - -// This function checks to see if the provided -// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true, -// the function adds an additional requirement that the pool have the -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false, -// pools must NOT have this property. -// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is -// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but -// no pool was found meeting the requirements. If an error is encountered, we -// return that error. -hsa_status_t -FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) -{ - if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT; - - auto [api_ptr, pool_ptr] = - *static_cast*>(data); - hsa_amd_segment_t segment; - LOG_IF(FATAL, - api_ptr->hsa_amd_memory_pool_get_info_fn( - pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR) - << "Could not get pool segment"; - if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS; - - uint32_t flag; - LOG_IF(FATAL, - api_ptr->hsa_amd_memory_pool_get_info_fn( - pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR) - << "Could not get flag value"; - uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) - { - return HSA_STATUS_SUCCESS; - } - *(pool_ptr) = pool; - return HSA_STATUS_INFO_BREAK; -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t -FindStandardPool(hsa_amd_memory_pool_t pool, void* data) -{ - return FindGlobalPool(pool, data, false); -} - -// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that -// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS -// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT -hsa_status_t -FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) -{ - return FindGlobalPool(pool, data, true); -} - -void -init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent) -{ - CHECK(!cpu_agent.isGpu()); - auto params = std::make_pair(&api, &cpu_agent.cpu_pool); - - auto status = - api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, ¶ms); - LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) - << "Error: Command Buffer Pool is not initialized"; - - params.second = &cpu_agent.kernarg_pool; - status = - api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params)); - LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) - << "Error: Output Buffer Pool is not initialized"; -} - -void -init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info) -{ - CHECK(agent_info.isGpu()); - auto params = std::make_pair(&api, &agent_info.gpu_pool); - auto status = - api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, ¶ms); - - LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) - << "Error: GPU Pool is not initialized"; -} - -} // namespace - -const std::vector& -all_agents() -{ - static std::shared_ptr> agents = AgentInfo::getAgents( - {.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info}, - {.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info, - .hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools, - .hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate, - .hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free, - .hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access}); - return *agents; -} - -std::shared_ptr> -AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api) -{ - std::vector agents; - std::shared_ptr> agent_info_ptr = - std::make_shared>(); - auto& agent_info = *agent_info_ptr; - - api.hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - CHECK_NOTNULL(static_cast*>(data))->emplace_back(agent); - return HSA_STATUS_SUCCESS; - }, - &agents); - - auto near_gpu_map = get_gpu_nodes_near_cpu(); - std::unordered_map cpu_id_to_agent; - - // Reserve is required to prevent reallocation (which breaks cpu_id_to_agent) - agent_info.reserve(agents.size()); - for(auto& agent : agents) - { - auto& new_agent = agent_info.emplace_back(agent, api); - if(!new_agent.isGpu()) - { - uint32_t cpu_numa_node_id; - LOG_IF(FATAL, - api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) != - HSA_STATUS_SUCCESS) - << "Could not fetch numa info"; - new_agent.setNumaNode(cpu_numa_node_id); - cpu_id_to_agent[cpu_numa_node_id] = &new_agent; - init_cpu_pool(ext_api, new_agent); - } - else if(new_agent.isGpu()) - { - uint32_t node_id; - LOG_IF(FATAL, - api.hsa_agent_get_info_fn( - agent, - static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), - &node_id) != HSA_STATUS_SUCCESS) - << "Could not fetch driver node id"; - new_agent.setIndex(node_id); - LOG_IF(FATAL, - api.hsa_agent_get_info_fn(agent, - static_cast(HSA_AGENT_INFO_NODE), - &node_id) != HSA_STATUS_SUCCESS) - << "Could not fetch driver node id"; - new_agent.setNumaNode(node_id); - init_gpu_pool(ext_api, new_agent); - } - } - - // Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first - // NUMA nodes) - for(auto& agent : agent_info) - { - if(agent.isGpu()) - { - auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode()); - LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent); - - auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu); - LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu); - agent.setNearCpuAgent((*id_to_agent)->getAgent()); - agent.cpu_pool = (*id_to_agent)->cpu_pool; - agent.kernarg_pool = (*id_to_agent)->kernarg_pool; - } - } - return agent_info_ptr; -} - -AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table) -: handle_(agent.handle) -, agent_(agent) -{ - if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS) - { - LOG(FATAL) << "hsa_agent_get_info failed"; - } - - table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_); - - const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1); - memcpy(gfxip_, name_, gfxip_label_len); - gfxip_[gfxip_label_len] = '\0'; - - if(type_ != HSA_DEVICE_TYPE_GPU) - { - return; - } - - table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_); - table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_); - - table.hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_); - - table.hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_); - - table.hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_); - - if(table.hsa_agent_get_info_fn(agent, - (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, - &shader_arrays_per_se_) != HSA_STATUS_SUCCESS || - table.hsa_agent_get_info_fn(agent, - (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, - &waves_per_cu_) != HSA_STATUS_SUCCESS) - { - LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed"; - } - - compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_); - wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_; - - if(table.hsa_agent_get_info_fn(agent, - (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN, - &pci_domain_) != HSA_STATUS_SUCCESS || - table.hsa_agent_get_info_fn(agent, - (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID, - &pci_location_id_) != HSA_STATUS_SUCCESS) - { - LOG(FATAL) << "hsa_agent_get_info for PCI info failed"; - } -} - -uint64_t -AgentInfo::getIndex() const -{ - return index_; -} - -hsa_device_type_t -AgentInfo::getType() const -{ - return type_; -} - -uint64_t -AgentInfo::getHandle() const -{ - return handle_; -} - -const std::string_view -AgentInfo::getName() const -{ - return name_; -} - -std::string -AgentInfo::getGfxip() const -{ - return std::string(gfxip_); -} - -uint32_t -AgentInfo::getMaxWaveSize() const -{ - return max_wave_size_; -} - -uint32_t -AgentInfo::getMaxQueueSize() const -{ - return max_queue_size_; -} - -uint32_t -AgentInfo::getCUCount() const -{ - return cu_num_; -} - -uint32_t -AgentInfo::getSimdCountPerCU() const -{ - return simds_per_cu_; -} - -uint32_t -AgentInfo::getShaderEngineCount() const -{ - return se_num_; -} - -uint32_t -AgentInfo::getShaderArraysPerSE() const -{ - return shader_arrays_per_se_; -} - -uint32_t -AgentInfo::getMaxWavesPerCU() const -{ - return waves_per_cu_; -} - -uint32_t -AgentInfo::getCUCountPerSH() const -{ - return compute_units_per_sh_; -} - -uint32_t -AgentInfo::getWaveSlotsPerSimd() const -{ - return wave_slots_per_simd_; -} - -uint32_t -AgentInfo::getPCIDomain() const -{ - return pci_domain_; -} - -uint32_t -AgentInfo::getPCILocationID() const -{ - return pci_location_id_; -} - -uint32_t -AgentInfo::getXccCount() const -{ - return xcc_num_; -} - -void -AgentInfo::setIndex(uint64_t index) -{ - index_ = index; -} - -void -AgentInfo::setType(hsa_device_type_t type) -{ - type_ = type; -} - -void -AgentInfo::setHandle(uint64_t handle) -{ - handle_ = handle; -} - -void -AgentInfo::setName(const std::string& name) -{ - constexpr auto name_len = sizeof(name_) / sizeof(char); - // - // char* strncpy(char* destination, const char* source, size_t num) - // - // If the end of the source string (which is signaled by a null-character) is found before num - // characters have been copied, destination is padded with zeros until a total of num characters - // have been written to it - strncpy(name_, name.c_str(), name_len - 2); - // ensure always terminated - name_[name_len - 1] = '\0'; -} - -void -AgentInfo::setNumaNode(uint32_t numa_node) -{ - numa_node_ = numa_node; -} - -uint32_t -AgentInfo::getNumaNode() const -{ - return numa_node_; -} - -void -AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent) -{ - near_cpu_agent_ = near_cpu_agent; -} - -hsa_agent_t -AgentInfo::getNearCpuAgent() -{ - return near_cpu_agent_; -} -} // namespace hsa -} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent.hpp deleted file mode 100644 index ccecc9e896..0000000000 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/hsa/agent.hpp +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once - -#include -#include -#include - -#include "fmt/core.h" -#include "fmt/ranges.h" - -#include -#include -#include -#include - -#include "lib/common/utility.hpp" - -namespace rocprofiler -{ -namespace hsa -{ -static const uint32_t LDS_BLOCK_SIZE = 128 * 4; - -class AgentInfo -{ -public: - AgentInfo() = default; - AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table); - uint64_t getIndex() const; - hsa_device_type_t getType() const; - bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; } - bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; } - uint64_t getHandle() const; - const std::string_view getName() const; - const char* getNameChar() const { return name_; } - std::string getGfxip() const; - uint32_t getMaxWaveSize() const; - uint32_t getMaxQueueSize() const; - uint32_t getCUCount() const; - uint32_t getSimdCountPerCU() const; - uint32_t getShaderEngineCount() const; - uint32_t getShaderArraysPerSE() const; - uint32_t getMaxWavesPerCU() const; - uint32_t getCUCountPerSH() const; - uint32_t getWaveSlotsPerSimd() const; - uint32_t getPCIDomain() const; - uint32_t getPCILocationID() const; - uint32_t getXccCount() const; - - void setIndex(uint64_t index); - void setType(hsa_device_type_t type); - void setHandle(uint64_t handle); - void setName(const std::string& name); - - void setNumaNode(uint32_t numa_node); - uint32_t getNumaNode() const; - - void setNearCpuAgent(hsa_agent_t near_cpu_agent); - hsa_agent_t getNearCpuAgent(); - hsa_agent_t getAgent() const { return agent_; } - - hsa_amd_memory_pool_t cpu_pool; - hsa_amd_memory_pool_t kernarg_pool; - hsa_amd_memory_pool_t gpu_pool; - - static std::shared_ptr> getAgents(const CoreApiTable&, - const AmdExtTable&); - - // Keep move constuctors (i.e. std::move()) - AgentInfo(AgentInfo&& other) noexcept = default; - AgentInfo& operator=(AgentInfo&& other) noexcept = default; - - // Do not allow copying this class - AgentInfo(const AgentInfo&) = delete; - AgentInfo& operator=(const AgentInfo&) = delete; - -private: - uint64_t index_ = 0; - hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 - uint64_t handle_ = 0; - char name_[64] = {'\0'}; - char gfxip_[64] = {'\0'}; - uint32_t max_wave_size_ = 0; - uint32_t max_queue_size_ = 0; - uint32_t cu_num_ = 0; - uint32_t simds_per_cu_ = 0; - uint32_t se_num_ = 0; - uint32_t shader_arrays_per_se_ = 0; - uint32_t waves_per_cu_ = 0; - // CUs per SH/SA - uint32_t compute_units_per_sh_ = 0; - uint32_t wave_slots_per_simd_ = 0; - // Number of XCCs on the GPU - uint32_t xcc_num_ = 0; - - uint32_t pci_domain_ = 0; - uint32_t pci_location_id_ = 0; - - uint32_t numa_node_ = 0; - hsa_agent_t near_cpu_agent_ = {}; - hsa_agent_t agent_ = {}; -}; - -const std::vector& -all_agents(); -} // namespace hsa -} // namespace rocprofiler - -namespace fmt -{ -template <> -struct formatter -{ - template - constexpr auto parse(ParseContext& ctx) - { - return ctx.begin(); - } - - template - auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const - { - auto device_type_name = [](auto dev) -> std::string_view { - switch(dev) - { - case HSA_DEVICE_TYPE_CPU: return "CPU"; - case HSA_DEVICE_TYPE_GPU: return "GPU"; - case HSA_DEVICE_TYPE_DSP: return "DSP"; - } - return "UNKNOWN"; - }; - - return fmt::format_to( - ctx.out(), - R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})", - agent.getIndex(), - device_type_name(agent.getType()), - agent.getHandle(), - agent.getName(), - agent.getGfxip(), - agent.getMaxWaveSize(), - agent.getMaxQueueSize(), - agent.getCUCount(), - agent.getSimdCountPerCU(), - agent.getShaderEngineCount(), - agent.getShaderArraysPerSE(), - agent.getMaxWavesPerCU(), - agent.getCUCountPerSH(), - agent.getWaveSlotsPerSimd(), - agent.getPCIDomain(), - agent.getPCILocationID(), - agent.getXccCount()); - } -}; -} // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp index 0bff1efb67..b6d59c3ae0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.cpp @@ -100,28 +100,6 @@ get_forced_configure() return _v; } -void -init_logging() -{ - static auto _once = std::once_flag{}; - std::call_once(_once, []() { - auto get_argv0 = []() { - auto ifs = std::ifstream{"/proc/self/cmdline"}; - auto sarg = std::string{}; - while(ifs && !ifs.eof()) - { - ifs >> sarg; - if(!sarg.empty()) break; - } - return sarg; - }; - - static auto argv0 = get_argv0(); - google::InitGoogleLogging(argv0.c_str()); - LOG(INFO) << "logging initialized"; - }); -} - std::vector get_link_map() { @@ -397,6 +375,28 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id) } } // namespace +void +init_logging() +{ + static auto _once = std::once_flag{}; + std::call_once(_once, []() { + auto get_argv0 = []() { + auto ifs = std::ifstream{"/proc/self/cmdline"}; + auto sarg = std::string{}; + while(ifs && !ifs.eof()) + { + ifs >> sarg; + if(!sarg.empty()) break; + } + return sarg; + }; + + static auto argv0 = get_argv0(); + google::InitGoogleLogging(argv0.c_str()); + LOG(INFO) << "logging initialized"; + }); +} + uint32_t get_client_offset() { diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.hpp index 3800254ff1..5abfdf8e57 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/registration.hpp @@ -54,6 +54,10 @@ namespace rocprofiler { namespace registration { +// initialize google logging +void +init_logging(); + // initialize the clients void initialize(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/rocprofiler.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/rocprofiler.cpp index ea0b767a28..7bc548ea6f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/rocprofiler.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/rocprofiler.cpp @@ -24,22 +24,6 @@ #include #include "lib/common/utility.hpp" -#include "lib/rocprofiler/context/context.hpp" -#include "lib/rocprofiler/context/domain.hpp" -#include "lib/rocprofiler/hsa/agent.hpp" -#include "lib/rocprofiler/hsa/hsa.hpp" -#include "lib/rocprofiler/registration.hpp" - -#include -#include - -namespace -{ -template -auto -consume_args(Tp&&...) -{} -} // namespace extern "C" { rocprofiler_status_t diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/CMakeLists.txt index cb4e90de3a..5bf9263edb 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/CMakeLists.txt @@ -11,10 +11,10 @@ include(GoogleTest) # # -------------------------------------------------------------------------------------- # -set(rocprofiler_lib_sources buffer.cpp) +set(rocprofiler_lib_sources agent.cpp buffer.cpp timestamp.cpp version.cpp) add_executable(rocprofiler-lib-tests) -target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources}) +target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources} details/agent.cpp) target_link_libraries( rocprofiler-lib-tests PRIVATE rocprofiler::rocprofiler-static-library diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp new file mode 100644 index 0000000000..71ff2c2c93 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/agent.cpp @@ -0,0 +1,217 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include + +#include "lib/rocprofiler/registration.hpp" +#include "lib/rocprofiler/tests/details/agent.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +TEST(rocprofiler_lib, agent_abi) +{ + constexpr auto msg = "ABI break. NEW FIELDS MAY ONLY BE ADDED AT END OF STRUCT"; + + EXPECT_EQ(offsetof(rocprofiler_agent_t, size), 0) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, id), 8) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, type), 16) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_cores_count), 20) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_count), 24) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks_count), 28) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, caches_count), 32) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links_count), 36) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_core_id_base), 40) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_id_base), 44) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_simd), 48) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, lds_size_in_kb), 52) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, gds_size_in_kb), 56) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_gws), 60) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, wave_front_size), 64) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_xcc), 68) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_count), 72) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, array_count), 76) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_shader_banks), 80) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_arrays_per_engine), 84) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_simd_array), 88) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_per_cu), 92) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, max_slots_scratch_cu), 96) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, gfx_target_version), 100) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_id), 104) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, device_id), 106) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, location_id), 108) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, domain), 112) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, drm_render_minor), 116) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_engines), 120) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_xgmi_engines), 124) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_queues_per_engine), 128) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, num_cp_queues), 132) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_ccompute), 136) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_fcompute), 140) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, sdma_fw_version), 144) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, fw_version), 148) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, capability), 152) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_engine), 156) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_cu), 160) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, family_id), 164) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_size), 168) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_size), 172) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, local_mem_size), 176) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, hive_id), 184) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, gpu_id), 192) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_dim), 200) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_dim), 212) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks), 224) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, caches), 232) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links), 240) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, name), 248) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg; + EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg; + // Add test for offset of new field above this. Do NOT change any existing values! + + // If a new field is added, increase this value by the size of the new field(s) + EXPECT_EQ(sizeof(rocprofiler_agent_t), 296) + << "ABI break. If you added a new field, make sure that this is the only new check that " + "failed. Please add a check for the new field at the offset and update this test to the " + "new size"; +} + +TEST(rocprofiler_lib, agent) +{ + rocprofiler::registration::init_logging(); + + auto info_ret = std::system("/usr/bin/rocminfo"); + EXPECT_EQ(info_ret, 0); + + auto sys_ret = std::system( + "/bin/bash -c 'for i in $(find /sys/class/kfd/kfd/topology/nodes -maxdepth 2 -type f | " + "grep properties | sort); do echo -e \"\n##### ${i} #####\n\"; cat ${i}; echo \"\"; done'"); + EXPECT_EQ(sys_ret, 0); + + auto agents = std::vector{}; + rocprofiler_available_agents_cb_t iterate_cb = + [](const rocprofiler_agent_t** agents_arr, size_t num_agents, void* user_data) { + auto* agents_v = static_cast*>(user_data); + // EXPECT_EQ(num_agents, hsa_agents_v.size()); + for(size_t i = 0; i < num_agents; ++i) + { + const auto* agent = agents_arr[i]; + agents_v->emplace_back(agent); + } + return ROCPROFILER_STATUS_SUCCESS; + }; + + auto status = + rocprofiler_query_available_agents(iterate_cb, + sizeof(rocprofiler_agent_t), + const_cast(static_cast(&agents))); + + EXPECT_EQ(status, ROCPROFILER_STATUS_SUCCESS); + + auto _rocm_info = rocprofiler::test::rocm_info{}; + EXPECT_EQ(rocprofiler::test::get_info(_rocm_info), 0); + + auto& hsa_agents_v = _rocm_info.agents; + + ASSERT_EQ(agents.size(), hsa_agents_v.size()); + for(size_t i = 0; i < agents.size(); ++i) + { + const auto* agent = agents.at(i); + + auto msg = fmt::format("name={}, model={}, gfx version={}, id={}, type={}", + agent->name, + agent->model_name, + agent->gfx_target_version, + agent->id.handle, + agent->type == ROCPROFILER_AGENT_TYPE_CPU ? "CPU" : "GPU"); + + // std::cout << msg << std::endl; + EXPECT_LT(i, hsa_agents_v.size()) << msg; + if(i >= hsa_agents_v.size()) continue; + + auto* hsa_agent = &hsa_agents_v.at(i); + + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) + { + EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_CPU) << msg; + } + else if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) + { + EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_GPU) << msg; + } + else + { + EXPECT_TRUE(false) << msg << " :: agent-type != CPU|GPU :: " << agent->type; + } + + EXPECT_EQ(std::string_view{agent->name}, std::string_view{hsa_agent->name}) << msg; + EXPECT_EQ(std::string_view{agent->vendor_name}, std::string_view{hsa_agent->vendor_name}) + << msg; + EXPECT_EQ(std::string_view{agent->product_name}, + std::string_view{hsa_agent->device_mkt_name}) + << msg; + EXPECT_EQ(agent->simd_count, hsa_agent->compute_unit * hsa_agent->simds_per_cu) << msg; + EXPECT_EQ(agent->cu_count, hsa_agent->compute_unit) << msg; + EXPECT_EQ(agent->simd_per_cu, hsa_agent->simds_per_cu) << msg; + EXPECT_EQ(agent->wave_front_size, hsa_agent->wavefront_size) << msg; + EXPECT_EQ(agent->simd_arrays_per_engine, hsa_agent->shader_arrs_per_sh_eng) << msg; + EXPECT_EQ(agent->max_waves_per_cu, hsa_agent->max_waves_per_cu) << msg; + EXPECT_EQ(agent->num_shader_banks, hsa_agent->shader_engs) << msg; + EXPECT_EQ(agent->workgroup_max_size, hsa_agent->workgroup_max_size) << msg; + EXPECT_EQ(agent->workgroup_max_dim.x, hsa_agent->workgroup_max_dim[0]) << msg; + EXPECT_EQ(agent->workgroup_max_dim.y, hsa_agent->workgroup_max_dim[1]) << msg; + EXPECT_EQ(agent->workgroup_max_dim.z, hsa_agent->workgroup_max_dim[2]) << msg; + EXPECT_EQ(agent->grid_max_size, hsa_agent->grid_max_size) << msg; + EXPECT_EQ(agent->grid_max_dim.x, hsa_agent->grid_max_dim.x) << msg; + EXPECT_EQ(agent->grid_max_dim.y, hsa_agent->grid_max_dim.y) << msg; + EXPECT_EQ(agent->grid_max_dim.z, hsa_agent->grid_max_dim.z) << msg; + if(agent->type == ROCPROFILER_AGENT_TYPE_GPU) + { + // HSA lib doesn't set family ID for CPU-only but we do + EXPECT_EQ(agent->family_id, hsa_agent->family_id) << msg; + } + EXPECT_EQ(agent->fw_version.ui32.uCode, hsa_agent->ucode_version) << msg; + EXPECT_EQ(agent->sdma_fw_version.uCodeSDMA, hsa_agent->sdma_ucode_version) << msg; + + if(hsa_agent->shader_engs > 0) + { + EXPECT_EQ(agent->cu_per_engine, hsa_agent->compute_unit / hsa_agent->shader_engs) + << msg; + } + } + + // clean up memory leak + for(auto& itr : _rocm_info.isas) + delete[] itr.name_str; +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.cpp new file mode 100644 index 0000000000..fb1cf578c7 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.cpp @@ -0,0 +1,478 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "agent.hpp" + +#include "lib/common/utility.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define RET_IF_HSA_INIT_ERR(err) \ + { \ + if((err) != HSA_STATUS_SUCCESS) \ + { \ + CheckInitError(); \ + RET_IF_HSA_ERR(err); \ + } \ + } + +#define RET_IF_HSA_ERR(err) \ + { \ + if((err) != HSA_STATUS_SUCCESS) \ + { \ + char err_val[12]; \ + char* err_str = nullptr; \ + if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \ + { \ + sprintf(&(err_val[0]), "%#x", (uint32_t) err); \ + err_str = &(err_val[0]); \ + } \ + printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \ + printf("Call returned %s\n", err_str); \ + return (err); \ + } \ + } + +// namespace fs = std::filesystem; + +namespace rocprofiler +{ +namespace test +{ +namespace +{ +// Acquire system information +hsa_status_t +AcquireSystemInfo(system_info_t* sys_info) +{ + hsa_status_t err; + + // Get Major and Minor version of runtime + err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major); + RET_IF_HSA_ERR(err); + err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor); + RET_IF_HSA_ERR(err); + + // Get timestamp frequency + err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_info->timestamp_frequency); + RET_IF_HSA_ERR(err); + + // Get maximum duration of a signal wait operation + err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &sys_info->max_wait); + RET_IF_HSA_ERR(err); + + // Get Endianness of the system + err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness); + RET_IF_HSA_ERR(err); + + // Get machine model info + err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &sys_info->machine_model); + RET_IF_HSA_ERR(err); + return err; +} + +hsa_status_t +AcquireAgentInfoEntry(hsa_agent_t agent, agent_info_t* agent_i) +{ + hsa_status_t err; + // Get agent name and vendor + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name); + RET_IF_HSA_ERR(err); + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &agent_i->vendor_name); + RET_IF_HSA_ERR(err); + + // Get device marketing name + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_PRODUCT_NAME, &agent_i->device_mkt_name); + RET_IF_HSA_ERR(err); + + // Get agent feature + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_i->agent_feature); + RET_IF_HSA_ERR(err); + + // Get profile supported by the agent + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_i->agent_profile); + RET_IF_HSA_ERR(err); + + // Get floating-point rounding mode + err = hsa_agent_get_info( + agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &agent_i->float_rounding_mode); + RET_IF_HSA_ERR(err); + + // Get max number of queue + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &agent_i->max_queue); + RET_IF_HSA_ERR(err); + + // Get queue min size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &agent_i->queue_min_size); + RET_IF_HSA_ERR(err); + + // Get queue max size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_i->queue_max_size); + RET_IF_HSA_ERR(err); + + // Get queue type + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &agent_i->queue_type); + RET_IF_HSA_ERR(err); + + // Get agent node + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node); + RET_IF_HSA_ERR(err); + + // Get device type + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_i->device_type); + RET_IF_HSA_ERR(err); + + if(HSA_DEVICE_TYPE_GPU == agent_i->device_type) + { + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa); + RET_IF_HSA_ERR(err); + } + + // Get cache size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, agent_i->cache_size); + RET_IF_HSA_ERR(err); + + // Get chip id + err = + hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID, &agent_i->chip_id); + RET_IF_HSA_ERR(err); + + // Get cacheline size + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &agent_i->cacheline_size); + RET_IF_HSA_ERR(err); + + // Get Max clock frequency + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &agent_i->max_clock_freq); + RET_IF_HSA_ERR(err); + + // Internal Driver node ID + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &agent_i->internal_node_id); + RET_IF_HSA_ERR(err); + + // Max number of watch points on mem. addr. ranges to generate exeception + // events + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS, + &agent_i->max_addr_watch_pts); + RET_IF_HSA_ERR(err); + + // Get Agent BDFID + err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id); + RET_IF_HSA_ERR(err); + + // Get Max Memory Clock + // Not supported by hsa_agent_get_info + // err = hsa_agent_get_info(agent,d + // (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, + // &agent_i->mem_max_freq); + // RET_IF_HSA_ERR(err); + + // Get Num SIMDs per CU + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &agent_i->simds_per_cu); + RET_IF_HSA_ERR(err); + + // Get Num Shader Engines + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &agent_i->shader_engs); + RET_IF_HSA_ERR(err); + + // Get Num Shader Arrays per Shader engine + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, + &agent_i->shader_arrs_per_sh_eng); + RET_IF_HSA_ERR(err); + + // Get number of Compute Unit + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &agent_i->compute_unit); + RET_IF_HSA_ERR(err); + + // family id + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID, &agent_i->family_id); + RET_IF_HSA_ERR(err); + + // ucode version + err = hsa_agent_get_info( + agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_UCODE_VERSION, &agent_i->ucode_version); + RET_IF_HSA_ERR(err); + + // sdma ucode version + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION, + &agent_i->sdma_ucode_version); + RET_IF_HSA_ERR(err); + + // Check if the agent is kernel agent + if((agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) != 0) + { + // Get flaf of fast_f16 operation + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16); + RET_IF_HSA_ERR(err); + + // Get wavefront size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size); + RET_IF_HSA_ERR(err); + + // Get max total number of work-items in a workgroup + err = hsa_agent_get_info( + agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &agent_i->workgroup_max_size); + RET_IF_HSA_ERR(err); + + // Get max number of work-items of each dimension of a work-group + err = hsa_agent_get_info( + agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &agent_i->workgroup_max_dim); + RET_IF_HSA_ERR(err); + + // Get max number of a grid per dimension + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &agent_i->grid_max_dim); + RET_IF_HSA_ERR(err); + + // Get max total number of work-items in a grid + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &agent_i->grid_max_size); + RET_IF_HSA_ERR(err); + + // Get max number of fbarriers per work group + err = hsa_agent_get_info( + agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &agent_i->fbarrier_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, + &agent_i->max_waves_per_cu); + RET_IF_HSA_ERR(err); + } + return err; +} + +hsa_status_t +AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t* pool_i) +{ + hsa_status_t err; + + err = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &pool_i->segment); + RET_IF_HSA_ERR(err); + + // Get the size of the POOL + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &pool_i->pool_size); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &pool_i->alloc_allowed); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &pool_i->alloc_granule); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &pool_i->pool_alloc_alignment); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pool_i->pl_access); + RET_IF_HSA_ERR(err); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t +get_pool_info(hsa_amd_memory_pool_t pool, void* data) +{ + auto* info = static_cast(data); + auto& pool_i = info->pools.emplace_back(); + auto err = AcquirePoolInfo(pool, &pool_i); + RET_IF_HSA_ERR(err); + + return err; +} + +hsa_status_t +AcquireISAInfo(hsa_isa_t isa, isa_info_t* isa_i) +{ + hsa_status_t err; + uint32_t name_len; + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len); + RET_IF_HSA_ERR(err); + + isa_i->name_str = new char[name_len]; + if(isa_i->name_str == nullptr) + { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS, isa_i->mach_models); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt( + isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->def_rounding_modes); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt( + isa, HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->base_rounding_modes); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION, &isa_i->fast_f16); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM, &isa_i->workgroup_max_dim); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE, &isa_i->workgroup_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM, &isa_i->grid_max_dim); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE, &isa_i->grid_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE, &isa_i->fbarrier_max_size); + RET_IF_HSA_ERR(err); + + return err; +} + +hsa_status_t +get_isa_info(hsa_isa_t isa, void* data) +{ + auto* info = static_cast(data); + isa_info_t& isa_i = info->isas.emplace_back(); + + isa_i.name_str = nullptr; + RET_IF_HSA_ERR(AcquireISAInfo(isa, &isa_i)); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t +AcquireAgentInfo(hsa_agent_t agent, void* data) +{ + auto* info = static_cast(data); + agent_info_t& agent_i = info->agents.emplace_back(); + + RET_IF_HSA_ERR(AcquireAgentInfoEntry(agent, &agent_i)); + RET_IF_HSA_ERR(hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, data)); + + { + auto err = hsa_agent_iterate_isas(agent, get_isa_info, data); + if(err != HSA_STATUS_ERROR_INVALID_AGENT) RET_IF_HSA_ERR(err); + } + + return HSA_STATUS_SUCCESS; +} + +void +CheckInitError() +{ + printf("ROCm initialization failed\n"); + + // Check kernel module for ROCk is loaded + FILE* fd = popen("lsmod | grep amdgpu", "r"); + char buf[16]; + if(fread(buf, 1, sizeof(buf), fd) <= 0) + { + printf("ROCk module is NOT loaded, possibly no GPU devices\n"); + return; + } + + // Check if user belongs to group "video" + // @note: User who are not members of "video" + // group cannot access DRM services + int status = -1; + bool member = false; + char gr_name[] = "video"; + struct group* grp = nullptr; + do + { + grp = getgrent(); + if(grp == nullptr) + { + break; + } + status = memcmp(gr_name, grp->gr_name, sizeof(gr_name)); + if(status == 0) + { + member = true; + break; + } + } while(grp != nullptr); + + if(member == false) + { + printf("User is not member of \"video\" group\n"); + return; + } +} +} // namespace + +// Print out all static information known to HSA about the target system. +// Throughout this program, the Acquire-type functions make HSA calls to +// interate through HSA objects and then perform HSA get_info calls to +// acccumulate information about those objects. Corresponding to each +// Acquire-type function is a Display* function which display the +// accumulated data in a formatted way. +int +get_info(rocm_info& info) +{ + RET_IF_HSA_INIT_ERR(hsa_init()); + + // This function will call HSA get_info functions to gather information + // about the system. + RET_IF_HSA_ERR(AcquireSystemInfo(&info.system)); + + RET_IF_HSA_ERR(hsa_iterate_agents(AcquireAgentInfo, &info)); + + RET_IF_HSA_ERR(hsa_shut_down()); + + return HSA_STATUS_SUCCESS; +} + +#undef RET_IF_HSA_ERR +} // namespace test +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.hpp new file mode 100644 index 0000000000..31297503da --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/details/agent.hpp @@ -0,0 +1,149 @@ +// Copyright (c) 2018-2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include +#include + +#include "fmt/core.h" +#include "fmt/ranges.h" + +#include +#include +#include +#include + +#include "lib/common/utility.hpp" + +namespace rocprofiler +{ +namespace test +{ +// This structure holds system information acquired through hsa info related +// calls, and is later used for reference when displaying the information. +struct system_info_t +{ + uint16_t major = 0; + uint16_t minor = 0; + uint64_t timestamp_frequency = 0; + uint64_t max_wait = 0; + hsa_endianness_t endianness = {}; + hsa_machine_model_t machine_model = {}; +}; + +// This structure holds agent information acquired through hsa info related +// calls, and is later used for reference when displaying the information. +struct agent_info_t +{ + char name[64] = {'\0'}; + char vendor_name[64] = {'\0'}; + char device_mkt_name[64] = {'\0'}; + hsa_agent_feature_t agent_feature = {}; + hsa_profile_t agent_profile = {}; + hsa_default_float_rounding_mode_t float_rounding_mode = {}; + uint32_t max_queue = 0; + uint32_t queue_min_size = 0; + uint32_t queue_max_size = 0; + hsa_queue_type_t queue_type = {}; + uint32_t node = 0; + hsa_device_type_t device_type = {}; + uint32_t cache_size[4] = {0, 0, 0, 0}; + uint32_t chip_id = 0; + uint32_t cacheline_size = 0; + uint32_t max_clock_freq = 0; + uint32_t internal_node_id = 0; + uint32_t max_addr_watch_pts = 0; + uint32_t family_id = 0; + uint32_t ucode_version = 0; + uint32_t sdma_ucode_version = 0; + // HSA_AMD_AGENT_INFO_MEMORY_WIDTH is deprecated, so exclude + // uint32_t mem_max_freq; Not supported by get_info + uint32_t compute_unit = 0; + uint32_t wavefront_size = 0; + uint32_t workgroup_max_size = 0; + uint32_t grid_max_size = 0; + uint32_t fbarrier_max_size = 0; + uint32_t max_waves_per_cu = 0; + uint32_t simds_per_cu = 0; + uint32_t shader_engs = 0; + uint32_t shader_arrs_per_sh_eng = 0; + hsa_isa_t agent_isa = {}; + hsa_dim3_t grid_max_dim = {0, 0, 0}; + uint16_t workgroup_max_dim[3] = {0, 0, 0}; + uint16_t bdf_id = 0; + bool fast_f16 = false; +}; + +// This structure holds memory pool information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +struct pool_info_t +{ + uint32_t segment = 0; + size_t pool_size = 0; + bool alloc_allowed = false; + size_t alloc_granule = 0; + size_t pool_alloc_alignment = 0; + bool pl_access = false; + uint32_t global_flag = 0; +}; + +// This structure holds ISA information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +struct isa_info_t +{ + char* name_str = nullptr; + uint32_t workgroup_max_size = 0; + hsa_dim3_t grid_max_dim = {0, 0, 0}; + uint64_t grid_max_size = 0; + uint32_t fbarrier_max_size = 0; + uint16_t workgroup_max_dim[3] = {0, 0, 0}; + bool def_rounding_modes[3] = {false, false, false}; + bool base_rounding_modes[3] = {false, false, false}; + bool mach_models[2] = {false, false}; + bool profiles[2] = {false, false}; + bool fast_f16 = false; +}; + +// This structure holds cache information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +struct cache_info_t +{ + char* name_str = nullptr; + uint8_t level = 0; + uint32_t size = 0; +}; + +struct rocm_info +{ + system_info_t system = {}; + std::vector agents = {}; + std::vector pools = {}; + std::vector isas = {}; +}; + +int +get_info(rocm_info& info); +} // namespace test +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/timestamp.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/timestamp.cpp new file mode 100644 index 0000000000..c50f394169 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/timestamp.cpp @@ -0,0 +1,41 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/common/utility.hpp" + +#include + +TEST(rocprofiler_lib, timestamp) +{ + auto beg = rocprofiler::common::timestamp_ns(); + auto mid = rocprofiler_timestamp_t{}; + auto ret = rocprofiler_get_timestamp(&mid); + auto end = rocprofiler::common::timestamp_ns(); + + EXPECT_EQ(ret, ROCPROFILER_STATUS_SUCCESS); + EXPECT_GT(beg, 0); + EXPECT_GT(mid, beg); + EXPECT_GT(end, mid); +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/version.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/version.cpp new file mode 100644 index 0000000000..862a937649 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler/tests/version.cpp @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/common/utility.hpp" +#include "rocprofiler/version.h" + +#include + +TEST(rocprofiler_lib, version) +{ + auto correct_version = std::tuple( + ROCPROFILER_VERSION_MAJOR, ROCPROFILER_VERSION_MINOR, ROCPROFILER_VERSION_PATCH); + auto query_version = std::tuple(0, 0, 0); + auto query_version_copy = std::tuple(0, 0, 0); + + auto ret0 = rocprofiler_get_version(&std::get<0>(query_version), nullptr, nullptr); + auto ret1 = rocprofiler_get_version(nullptr, &std::get<1>(query_version), nullptr); + auto ret2 = rocprofiler_get_version(nullptr, nullptr, &std::get<2>(query_version)); + + EXPECT_EQ(ret0, ROCPROFILER_STATUS_SUCCESS); + EXPECT_EQ(ret1, ROCPROFILER_STATUS_SUCCESS); + EXPECT_EQ(ret2, ROCPROFILER_STATUS_SUCCESS); + EXPECT_EQ(query_version, correct_version); + + auto reta = rocprofiler_get_version(&std::get<0>(query_version_copy), + &std::get<1>(query_version_copy), + &std::get<2>(query_version_copy)); + EXPECT_EQ(reta, ROCPROFILER_STATUS_SUCCESS); + EXPECT_EQ(query_version_copy, correct_version); + EXPECT_EQ(query_version_copy, query_version); +} diff --git a/projects/rocprofiler-sdk/source/lib/tests/buffering/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/tests/buffering/CMakeLists.txt index 0d685882d6..28fcb8054b 100644 --- a/projects/rocprofiler-sdk/source/lib/tests/buffering/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/tests/buffering/CMakeLists.txt @@ -20,4 +20,4 @@ gtest_add_tests( TEST_LIST buffering-tests_TESTS WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests") +set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 120 LABELS "unittests")