Agent information w/o using hsa-runtime (#100)
* Agent information w/o using hsa-runtime
- remove lib/rocprofiler/hsa/agent.{hpp,cpp}
- update include/rocprofiler/agent.h
- basically all possible info from /sys/class/kfd/kfd/topology/nodes/*
* Print topology in rocprofiler_lib.agent test
- hack to help diagnose errors
* Update lib/rocprofiler/tests/details/agent.cpp
- use LOG_IF(WARNING, ...) instead of LOG_IF(FATAL, ...)
* Update lib/rocprofiler/tests/agent.cpp
- print rocminfo at beginning of test
- fix comparison of agent handle
- misc other checks
* Updte lib/rocprofiler/agent.cpp
- handle unreadable /sys/class/kfd/kfd/topology/nodes/<N>/properties file
* Update lib/tests/buffering/CMakeLists.txt
- increase timeout to 120
- buffering.parallel will timeout when thread sanitizing is enabled
* Update cmake: rocprofiler-drm
- find drm headers and libraries
* Update include/rocprofiler/agent.h
- add family_id field
* Update lib/rocprofiler/agent.cpp
- parse /proc/cpuinfo for name, family, apicid, etc.
- read_topology uses unique pointers to cleanup memory allocations
- implement name and gfxip
* Update lib/rocprofiler/tests/agent.cpp
- improved failure message
- check name/gfxip
- remove check against hsa_agent_t.handle
- this value is dependent on the address of C++ class
* Update lib/rocprofiler/tests/details/agent.cpp
- tweak gfxip_ variable which is broken for CPU
* Update lib/rocprofiler/agent.cpp
- update string handling for name and gfxip
* Update lib/rocprofiler/tests/agent.cpp
- minor output tweak
* Update lib/rocprofiler/registration.{hpp,cpp}
- registration::init_logging() function
* Update lib/rocprofiler/agent.cpp
- fix hex handling of GFX step version
* Update lib/rocprofiler/tests/details/agent.cpp
- fix format string when nearest CPUs not found
* Update lib/rocprofiler/tests/CMakeLists.txt
- exclude details/agent.cpp from being parsed for gtest tests
* Update include/rocprofiler/fwd.h
- add ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI status
* Update lib/rocprofiler/tests/details/agent.{hpp,cpp}
- replace with slightly modified implementation of rocminfo
- primary change was not printing
* Update lib/rocprofiler/tests/agent.cpp
- update test to use rocminfo data
* Update lib/rocprofiler/agent.cpp
- add pc_sampling_configs
- return error on incompatible ABI
* Update counters and counters tests
- rename test names for consistency
- fixed incorrect spelling of derived
* Add lib/rocprofiler/tests/{timestamp,version}.cpp
- add timestamp and version tests for rocprofiler_get_timestamp and rocprofiler_get_version, respectively
* Update lib/rocprofiler/tests/agent
- fix double free of name_str from isa_info_t
* Update include/rocprofiler/agent.h
- comments for rocprofiler_agent_mem_bank_t
- add rocprofiler_dim3_t
- comments for rocprofiler_agent_t
- add new fields to rocprofiler_agent_t
- cu_count
- workgroup_max_size
- workgroup_max_dim
- grid_max_size
- grid_max_dim
- vendor_name
- product_name
- change prototype of rocprofiler_available_agents_cb_t to be const agent**
* Update lib/rocprofiler/agent.cpp
- set size field
- implement:
- product_name
- vendor_name
- workgroup_max_size
- workgroup_max_dim
- grid_max_size
- grid_max_dim
- cu_count
* Update lib/rocprofiler/tests/agent.cpp
- changes for const agent*
* Update samples/pc_sampling
- updates for const agent*
* Update lib/rocprofiler/agent.cpp
- fix ABI compatibility check
- return incompatible if tool agent is larger than our agent
* Update include/rocprofiler/agent.h
- doxygen comments
- make size field of rocprofiler_agent_t uint64_t for consistency
- add gpu_id via /sys/class/kfd/kfd/.../<idx>/gpu_id
- add model_name via /sys/class/kfd/kfd/.../<idx>/name
* Update lib/rocprofiler/agent.cpp
- add read_file function (vector of strings)
- support enum in read_property
- assign model_name and gpu_id fields
- remove unique_id
* Update lib/rocprofiler/tests/details/agent.*
- support family id, ucode_version, sdma_ucode_version
* Update lib/rocprofiler/tests/agent.cpp
- Add rocprofiler_lib.agent_abi test
- Verify family_id, ucode_version, sdma_ucode_version
This commit is contained in:
committed by
GitHub
parent
2d533ad91e
commit
a798a26227
@@ -168,3 +168,39 @@ find_library(
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH})
|
||||
|
||||
target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library})
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# drm
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
find_path(
|
||||
drm_INCLUDE_DIR
|
||||
NAMES drm.h
|
||||
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATH_SUFFIXES include/drm include REQUIRED)
|
||||
|
||||
find_path(
|
||||
xf86drm_INCLUDE_DIR
|
||||
NAMES xf86drm.h
|
||||
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATH_SUFFIXES include/drm include REQUIRED)
|
||||
|
||||
find_library(
|
||||
drm_LIBRARY
|
||||
NAMES drm
|
||||
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED)
|
||||
|
||||
find_library(
|
||||
drm_amdgpu_LIBRARY
|
||||
NAMES drm_amdgpu
|
||||
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
|
||||
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED)
|
||||
|
||||
target_include_directories(rocprofiler-drm SYSTEM INTERFACE ${drm_INCLUDE_DIR}
|
||||
${xf86drm_INCLUDE_DIR})
|
||||
target_link_libraries(rocprofiler-drm INTERFACE ${drm_LIBRARY} ${drm_amdgpu_LIBRARY})
|
||||
|
||||
@@ -51,3 +51,4 @@ rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" IN
|
||||
rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL)
|
||||
rocprofiler_add_interface_library(rocprofiler-drm "drm (amdgpu) library" INTERNAL)
|
||||
|
||||
@@ -37,7 +37,7 @@ const std::string_view MI200_NAME = "gfx90a";
|
||||
}
|
||||
|
||||
static rocprofiler_status_t
|
||||
find_first_gpu_agent_impl(rocprofiler_agent_t** agents, size_t num_agents, void* data)
|
||||
find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, void* data)
|
||||
{
|
||||
// data is required
|
||||
if(!data) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
@@ -18,9 +18,9 @@ std::vector<rocprofiler_context_id_t> contexts;
|
||||
std::vector<rocprofiler_buffer_id_t> buffer_ids;
|
||||
|
||||
rocprofiler_status_t
|
||||
find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_t** agents,
|
||||
size_t num_agents,
|
||||
void* data)
|
||||
find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agents,
|
||||
size_t num_agents,
|
||||
void* data)
|
||||
{
|
||||
// data is required
|
||||
if(!data) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
@@ -25,6 +25,10 @@
|
||||
#include <rocprofiler/defines.h>
|
||||
#include <rocprofiler/fwd.h>
|
||||
|
||||
#include <hsakmt/hsakmttypes.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
/**
|
||||
@@ -34,14 +38,147 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Cache information for an agent.
|
||||
*/
|
||||
typedef struct rocprofiler_agent_cache_t
|
||||
{
|
||||
uint64_t processor_id_low; ///< Identifies the processor number
|
||||
uint64_t size; ///< Size of the cache
|
||||
uint32_t level; ///< Integer representing level: 1, 2, 3, 4, etc.
|
||||
uint32_t cache_line_size; ///< Cache line size in bytes
|
||||
uint32_t cache_lines_per_tag; ///< Cache lines per Cache Tag
|
||||
uint32_t association; ///< Cache Associativity
|
||||
uint32_t latency; ///< Cache latency in ns
|
||||
HsaCacheType type;
|
||||
} rocprofiler_agent_cache_t;
|
||||
|
||||
/**
|
||||
* @brief IO link information for an agent.
|
||||
*/
|
||||
typedef struct rocprofiler_agent_io_link_t
|
||||
{
|
||||
HSA_IOLINKTYPE type; ///< Discoverable IoLink Properties (optional)
|
||||
uint32_t version_major; ///< Bus interface version (optional)
|
||||
uint32_t version_minor; ///< Bus interface version (optional)
|
||||
uint32_t node_from; ///< See @ref rocprofiler_agent_id_t
|
||||
uint32_t node_to; ///< See @ref rocprofiler_agent_id_t
|
||||
uint32_t weight; ///< weight factor (derived from CDIT)
|
||||
uint32_t min_latency; ///< minimum cost of time to transfer (rounded to ns)
|
||||
uint32_t max_latency; ///< maximum cost of time to transfer (rounded to ns)
|
||||
uint32_t min_bandwidth; ///< minimum interface Bandwidth in MB/s
|
||||
uint32_t max_bandwidth; ///< maximum interface Bandwidth in MB/s
|
||||
uint32_t recommended_transfer_size; ///< recommended transfer size to reach maximum bandwidth
|
||||
///< in bytes
|
||||
HSA_LINKPROPERTY flags; ///< override flags (may be active for specific platforms)
|
||||
} rocprofiler_agent_io_link_t;
|
||||
|
||||
/**
|
||||
* @brief Memory bank information for an agent.
|
||||
*/
|
||||
typedef struct rocprofiler_agent_mem_bank_t
|
||||
{
|
||||
HSA_HEAPTYPE heap_type;
|
||||
HSA_MEMORYPROPERTY flags;
|
||||
uint32_t width; ///< the number of parallel bits of the memoryinterface
|
||||
uint32_t mem_clk_max; ///< clock for the memory, this allows computing the available
|
||||
///< bandwidth to the memory when needed
|
||||
uint64_t size_in_bytes; ///< physical memory size of the memory range in bytes
|
||||
} rocprofiler_agent_mem_bank_t;
|
||||
|
||||
/**
|
||||
* @brief Multi-dimensional struct of data
|
||||
*/
|
||||
typedef struct rocprofiler_dim3_t
|
||||
{
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
} rocprofiler_dim3_t;
|
||||
|
||||
/**
|
||||
* @brief Agent.
|
||||
*/
|
||||
typedef struct
|
||||
typedef struct rocprofiler_agent_t
|
||||
{
|
||||
rocprofiler_agent_id_t id;
|
||||
rocprofiler_agent_type_t type;
|
||||
const char* name;
|
||||
uint64_t size; ///< set to sizeof(rocprofiler_agent_t) by rocprofiler. This can be used for
|
||||
///< versioning and compatibility handling
|
||||
rocprofiler_agent_id_t id; ///< Internal opaque identifier
|
||||
rocprofiler_agent_type_t type; ///< Enumeration for identifying the agent type (CPU, GPU, etc.)
|
||||
uint32_t cpu_cores_count; ///< # of latency (= CPU) cores present on this HSA node. This value
|
||||
///< is 0 for a HSA node with no such cores, e.g a "discrete HSA GPU"
|
||||
uint32_t simd_count; ///< # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a
|
||||
///< node. This value is 0 if no FCompute cores are present (e.g. pure
|
||||
///< "CPU node").
|
||||
uint32_t mem_banks_count; ///< # of discoverable memory bank affinity properties on this
|
||||
///< "H-NUMA" node.
|
||||
uint32_t caches_count; ///< # of discoverable cache affinity properties on this "H-NUMA" node.
|
||||
uint32_t io_links_count; ///< # of discoverable IO link affinity properties of this node
|
||||
///< connecting to other nodes.
|
||||
uint32_t cpu_core_id_base; ///< low value of the logical processor ID of the latency (= CPU)
|
||||
///< cores available on this node
|
||||
uint32_t simd_id_base; ///< low value of the logical processor ID of the throughput (= GPU)
|
||||
///< units available on this node
|
||||
uint32_t max_waves_per_simd; ///< This identifies the max. number of launched waves per SIMD.
|
||||
///< If NumFComputeCores is 0, this value is ignored.
|
||||
uint32_t lds_size_in_kb; ///< Size of Local Data Store in Kilobytes per SIMD Wavefront
|
||||
uint32_t gds_size_in_kb; ///< Size of Global Data Store in Kilobytes shared across SIMD
|
||||
///< Wavefronts
|
||||
uint32_t num_gws; ///< Number of GWS barriers
|
||||
uint32_t wave_front_size; ///< Number of SIMD cores per wavefront executed, typically 64, may
|
||||
///< be 32 or a different value for some HSA based architectures
|
||||
uint32_t num_xcc; ///< Number of XCC
|
||||
uint32_t cu_count; ///< Number of compute units
|
||||
uint32_t array_count; ///< Number of SIMD arrays
|
||||
uint32_t num_shader_banks; ///< Number of Shader Banks or Shader Engines, typical values are 1
|
||||
///< or 2
|
||||
uint32_t simd_arrays_per_engine; ///< Number of SIMD arrays per engine
|
||||
uint32_t cu_per_simd_array; ///< Number of Compute Units (CU) per SIMD array
|
||||
uint32_t simd_per_cu; ///< Number of SIMD representing a Compute Unit (CU)
|
||||
uint32_t max_slots_scratch_cu; ///< Number of temp. memory ("scratch") wave slots available to
|
||||
///< access, may be 0 if HW has no restrictions
|
||||
uint32_t gfx_target_version; ///< major_version=((value / 10000) % 100)
|
||||
///< minor_version=((value / 100) % 100)
|
||||
///< patch_version=(value % 100)
|
||||
uint16_t vendor_id; ///< GPU vendor id; 0 on latency (= CPU)-only nodes
|
||||
uint16_t device_id; ///< GPU device id; 0 on latency (= CPU)-only nodes
|
||||
uint32_t location_id; ///< GPU BDF (Bus/Device/function number) - identifies the device
|
||||
///< location in the overall system
|
||||
uint32_t domain; ///< PCI domain of the GPU
|
||||
uint32_t drm_render_minor; ///< DRM render device minor device number
|
||||
uint32_t num_sdma_engines; ///< number of PCIe optimized SDMA engines
|
||||
uint32_t num_sdma_xgmi_engines; ///< number of XGMI optimized SDMA engines
|
||||
uint32_t num_sdma_queues_per_engine; ///< number of SDMA queue per one engine
|
||||
uint32_t num_cp_queues; ///< number of Compute queues
|
||||
uint32_t max_engine_clk_ccompute; ///< maximum engine clocks for CPU, including any boost
|
||||
///< capabilities
|
||||
uint32_t max_engine_clk_fcompute; ///< GPU only. Maximum engine clocks for GPU, including any
|
||||
///< boost capabilities
|
||||
HSA_ENGINE_VERSION sdma_fw_version; ///< GPU only
|
||||
HSA_ENGINE_ID
|
||||
fw_version; ///< GPU only. Identifier (rev) of the GPU uEngine or Firmware, may be 0
|
||||
HSA_CAPABILITY capability; ///< GPU only
|
||||
uint32_t cu_per_engine; ///< computed
|
||||
uint32_t max_waves_per_cu; ///< computed
|
||||
uint32_t family_id; ///< Family code
|
||||
uint32_t workgroup_max_size; ///< GPU only. Maximum total number of work-items in a work-group.
|
||||
uint32_t grid_max_size; ///< GPU only. Maximum number of fbarriers per work-group. Must be at
|
||||
///< least 32.
|
||||
uint64_t local_mem_size; ///< GPU only. Local memory size
|
||||
uint64_t hive_id; ///< XGMI Hive the GPU node belongs to in the system. It is an opaque and
|
||||
///< static number hash created by the PSP
|
||||
uint64_t gpu_id; ///< GPU only. KFD identifier
|
||||
rocprofiler_dim3_t workgroup_max_dim; ///< GPU only. Maximum number of work-items of each
|
||||
///< dimension of a work-group.
|
||||
rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension
|
||||
///< of a grid.
|
||||
rocprofiler_agent_mem_bank_t* mem_banks;
|
||||
rocprofiler_agent_cache_t* caches;
|
||||
rocprofiler_agent_io_link_t* io_links;
|
||||
const char* name; ///< Name of the agent. Will be identical to product name for CPU
|
||||
const char* vendor_name; ///< Vendor of agent (will be AMD)
|
||||
const char* product_name; ///< Marketing name
|
||||
const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc.
|
||||
rocprofiler_pc_sampling_config_array_t pc_sampling_configs;
|
||||
} rocprofiler_agent_t;
|
||||
|
||||
@@ -53,9 +190,10 @@ typedef struct
|
||||
* @param [in] user_data Data pointer passback
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(rocprofiler_agent_t** agents,
|
||||
size_t num_agents,
|
||||
void* user_data);
|
||||
typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(
|
||||
const rocprofiler_agent_t** agents,
|
||||
size_t num_agents,
|
||||
void* user_data);
|
||||
|
||||
/**
|
||||
* @brief Receive synchronous callback with an array of available agents at moment of invocation
|
||||
|
||||
@@ -67,6 +67,8 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
///< rocprofiler configuration (i.e.
|
||||
///< function called post-initialization)
|
||||
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented
|
||||
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible
|
||||
///< with current version of rocprofiler
|
||||
ROCPROFILER_STATUS_LAST,
|
||||
} rocprofiler_status_t;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ target_link_libraries(
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-amd-comgr>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-runtime>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>)
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-drm>)
|
||||
set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME
|
||||
rocprofiler-common)
|
||||
|
||||
@@ -24,55 +24,570 @@
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/rocprofiler/hsa/agent.hpp"
|
||||
#include <fmt/core.h>
|
||||
#include <glog/logging.h>
|
||||
#include <libdrm/amdgpu.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace agent
|
||||
{
|
||||
namespace
|
||||
{
|
||||
namespace fs = ::std::filesystem;
|
||||
|
||||
struct cpu_info
|
||||
{
|
||||
long processor = -1;
|
||||
long family = -1;
|
||||
long model = -1;
|
||||
long physical_id = -1;
|
||||
long core_id = -1;
|
||||
long apicid = -1;
|
||||
std::string vendor_id = {};
|
||||
std::string model_name = {};
|
||||
|
||||
bool is_valid() const
|
||||
{
|
||||
return !(processor < 0 || family < 0 || model < 0 || physical_id < 0 || core_id < 0 ||
|
||||
apicid < 0 || vendor_id.empty() || model_name.empty());
|
||||
}
|
||||
};
|
||||
|
||||
auto
|
||||
parse_cpu_info()
|
||||
{
|
||||
auto ifs = std::ifstream{"/proc/cpuinfo"};
|
||||
auto data = std::vector<cpu_info>{};
|
||||
if(!ifs) return data;
|
||||
|
||||
auto read_blocks = [&ifs]() {
|
||||
auto blocks = std::vector<std::vector<std::string>>{};
|
||||
auto current_block = std::vector<std::string>{};
|
||||
auto line = std::string{};
|
||||
while(std::getline(ifs, line))
|
||||
{
|
||||
if(ifs.eof())
|
||||
{
|
||||
if(!current_block.empty()) blocks.emplace_back(std::move(current_block));
|
||||
break;
|
||||
}
|
||||
|
||||
if(line.empty())
|
||||
{
|
||||
if(!current_block.empty()) blocks.emplace_back(std::move(current_block));
|
||||
current_block.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
current_block.emplace_back(line);
|
||||
}
|
||||
}
|
||||
return blocks;
|
||||
};
|
||||
|
||||
auto processor_blocks = read_blocks();
|
||||
auto processor_info = std::vector<cpu_info>{};
|
||||
processor_info.reserve(processor_blocks.size());
|
||||
|
||||
for(const auto& bitr : processor_blocks)
|
||||
{
|
||||
auto info_v = cpu_info{};
|
||||
for(const auto& itr : bitr)
|
||||
{
|
||||
auto match = std::smatch{};
|
||||
const std::regex re{".*: (.*)$"};
|
||||
if(std::regex_match(itr, match, re))
|
||||
{
|
||||
if(match.size() == 2)
|
||||
{
|
||||
std::ssub_match value = match[1];
|
||||
|
||||
if(itr.find("vendor_id") == 0)
|
||||
info_v.vendor_id = value.str();
|
||||
else if(itr.find("model name") == 0)
|
||||
info_v.model_name = value.str();
|
||||
else if(itr.find("processor") == 0)
|
||||
info_v.processor = std::stol(value.str());
|
||||
else if(itr.find("cpu family") == 0)
|
||||
info_v.family = std::stol(value.str());
|
||||
else if(itr.find("model") == 0 && itr.find("model name") != 0)
|
||||
info_v.model = std::stol(value.str());
|
||||
else if(itr.find("physical id") == 0)
|
||||
info_v.physical_id = std::stol(value.str());
|
||||
else if(itr.find("core id") == 0)
|
||||
info_v.core_id = std::stol(value.str());
|
||||
else if(itr.find("apicid") == 0)
|
||||
info_v.apicid = std::stol(value.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
if(info_v.is_valid())
|
||||
processor_info.emplace_back(info_v);
|
||||
else
|
||||
{
|
||||
LOG(ERROR) << "Invalid processor info: "
|
||||
<< fmt::format("processor={}, vendor={}, family={}, model={}, name={}, "
|
||||
"physical id={}, core id={}, apicid={}",
|
||||
info_v.processor,
|
||||
info_v.vendor_id,
|
||||
info_v.family,
|
||||
info_v.model,
|
||||
info_v.model_name,
|
||||
info_v.physical_id,
|
||||
info_v.core_id,
|
||||
info_v.apicid);
|
||||
}
|
||||
}
|
||||
|
||||
return processor_info;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_cpu_info()
|
||||
{
|
||||
static auto _v = parse_cpu_info();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
read_file(const std::string& fname)
|
||||
{
|
||||
auto data = std::vector<std::string>{};
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
while(true)
|
||||
{
|
||||
auto value = std::string{};
|
||||
ifs >> value;
|
||||
if(ifs.eof()) break;
|
||||
|
||||
if(!value.empty()) data.emplace_back(value);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
auto
|
||||
read_map(const std::string& fname)
|
||||
{
|
||||
auto data = std::unordered_map<std::string, std::string>{};
|
||||
auto ifs = std::ifstream{fname};
|
||||
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
|
||||
|
||||
while(true)
|
||||
{
|
||||
auto label = std::string{};
|
||||
ifs >> label;
|
||||
if(ifs.eof()) break;
|
||||
|
||||
auto entry = std::string{};
|
||||
ifs >> entry;
|
||||
if(ifs.eof())
|
||||
throw std::runtime_error{
|
||||
fmt::format("unexpected file format in '{}' at {}", fname, label)};
|
||||
|
||||
auto ret = data.emplace(label, entry);
|
||||
if(!ret.second)
|
||||
throw std::runtime_error{fmt::format("duplicate entry in '{}': {}", fname, label)};
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename MapT, typename Tp>
|
||||
void
|
||||
read_property(const MapT& data, const std::string& label, Tp& value)
|
||||
{
|
||||
if constexpr(std::is_enum<Tp>::value)
|
||||
{
|
||||
using value_type = std::underlying_type_t<Tp>;
|
||||
// never expect this to be true but it does guard against infinite recursion
|
||||
static_assert(!std::is_enum<value_type>::value, "Expected non-enum type");
|
||||
|
||||
auto value_v = static_cast<value_type>(value);
|
||||
read_property(data, label, value_v);
|
||||
value = static_cast<Tp>(value_v);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::is_integral<Tp>::value, "Expected integral type");
|
||||
using value_type = std::conditional_t<std::is_signed<Tp>::value, intmax_t, uintmax_t>;
|
||||
|
||||
if(data.find(label) == data.end())
|
||||
{
|
||||
LOG(ERROR) << "agent properties map missing " << label << " entry";
|
||||
return;
|
||||
}
|
||||
|
||||
auto iss = std::istringstream{data.at(label)};
|
||||
value_type local_value;
|
||||
iss >> local_value;
|
||||
|
||||
// verify that we have used the correct data sizes
|
||||
constexpr auto min_value = std::numeric_limits<Tp>::min();
|
||||
constexpr auto max_value = std::numeric_limits<Tp>::max();
|
||||
if(local_value < min_value)
|
||||
{
|
||||
throw std::runtime_error{
|
||||
fmt::format("data with label {} has a value (={}) which is less "
|
||||
"than the min value for the type (={})",
|
||||
label,
|
||||
local_value,
|
||||
min_value)};
|
||||
}
|
||||
else if(local_value > max_value)
|
||||
{
|
||||
throw std::runtime_error{fmt::format("data with label {} has a value (={}) which is "
|
||||
"greater "
|
||||
"than the max value for the type (={})",
|
||||
label,
|
||||
local_value,
|
||||
max_value)};
|
||||
}
|
||||
|
||||
value = static_cast<Tp>(local_value);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto
|
||||
compute_version(uint32_t major_v, uint32_t minor_v, uint32_t patch_v)
|
||||
{
|
||||
return (major_v * 10000) + (minor_v * 100) + patch_v;
|
||||
}
|
||||
|
||||
auto
|
||||
read_topology()
|
||||
{
|
||||
using unique_agent_t = std::unique_ptr<rocprofiler_agent_t, void (*)(rocprofiler_agent_t*)>;
|
||||
|
||||
auto sysfs_nodes_path = fs::path{"/sys/class/kfd/kfd/topology/nodes/"};
|
||||
if(!fs::exists(sysfs_nodes_path))
|
||||
throw std::runtime_error{
|
||||
fmt::format("sysfs nodes path '{}' does not exist", sysfs_nodes_path.string())};
|
||||
|
||||
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
|
||||
|
||||
auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
|
||||
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
1UL,
|
||||
1000000000UL,
|
||||
0}};
|
||||
|
||||
const auto& cpu_info_v = get_cpu_info();
|
||||
auto data = std::vector<unique_agent_t>{};
|
||||
uint64_t n = 0;
|
||||
|
||||
while(true)
|
||||
{
|
||||
auto idx = n++;
|
||||
auto node_path = sysfs_nodes_path / std::to_string(idx);
|
||||
if(!fs::exists(node_path)) break;
|
||||
|
||||
auto properties = std::unordered_map<std::string, std::string>{};
|
||||
auto name_prop = std::vector<std::string>{};
|
||||
auto gpu_id_prop = std::vector<std::string>{};
|
||||
try
|
||||
{
|
||||
properties = read_map(node_path / "properties");
|
||||
name_prop = read_file(node_path / "name");
|
||||
gpu_id_prop = read_file(node_path / "gpu_id");
|
||||
} catch(std::runtime_error& e)
|
||||
{
|
||||
LOG(ERROR) << "Error reading '" << (node_path / "properties").string()
|
||||
<< "' :: " << e.what();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto agent_info = rocprofiler_agent_t{};
|
||||
memset(&agent_info, 0, sizeof(agent_info));
|
||||
|
||||
agent_info.size = sizeof(rocprofiler_agent_t);
|
||||
agent_info.id.handle = idx;
|
||||
agent_info.type = ROCPROFILER_AGENT_TYPE_NONE;
|
||||
|
||||
if(!name_prop.empty())
|
||||
agent_info.model_name = strdup(name_prop.front().c_str());
|
||||
else
|
||||
agent_info.model_name = "";
|
||||
|
||||
if(!gpu_id_prop.empty()) agent_info.gpu_id = std::stoull(gpu_id_prop.front());
|
||||
|
||||
read_property(properties, "cpu_cores_count", agent_info.cpu_cores_count);
|
||||
read_property(properties, "simd_count", agent_info.simd_count);
|
||||
|
||||
if(agent_info.cpu_cores_count > 0)
|
||||
agent_info.type = ROCPROFILER_AGENT_TYPE_CPU;
|
||||
else if(agent_info.simd_count > 0)
|
||||
agent_info.type = ROCPROFILER_AGENT_TYPE_GPU;
|
||||
|
||||
read_property(properties, "mem_banks_count", agent_info.mem_banks_count);
|
||||
read_property(properties, "caches_count", agent_info.caches_count);
|
||||
read_property(properties, "io_links_count", agent_info.io_links_count);
|
||||
read_property(properties, "cpu_core_id_base", agent_info.cpu_core_id_base);
|
||||
read_property(properties, "simd_id_base", agent_info.simd_id_base);
|
||||
read_property(properties, "max_waves_per_simd", agent_info.max_waves_per_simd);
|
||||
read_property(properties, "lds_size_in_kb", agent_info.lds_size_in_kb);
|
||||
read_property(properties, "gds_size_in_kb", agent_info.gds_size_in_kb);
|
||||
read_property(properties, "num_gws", agent_info.num_gws);
|
||||
read_property(properties, "wave_front_size", agent_info.wave_front_size);
|
||||
read_property(properties, "array_count", agent_info.array_count);
|
||||
read_property(properties, "simd_arrays_per_engine", agent_info.simd_arrays_per_engine);
|
||||
read_property(properties, "cu_per_simd_array", agent_info.cu_per_simd_array);
|
||||
read_property(properties, "simd_per_cu", agent_info.simd_per_cu);
|
||||
read_property(properties, "max_slots_scratch_cu", agent_info.max_slots_scratch_cu);
|
||||
read_property(properties, "gfx_target_version", agent_info.gfx_target_version);
|
||||
read_property(properties, "vendor_id", agent_info.vendor_id);
|
||||
read_property(properties, "device_id", agent_info.device_id);
|
||||
read_property(properties, "location_id", agent_info.location_id);
|
||||
read_property(properties, "domain", agent_info.domain);
|
||||
read_property(properties, "drm_render_minor", agent_info.drm_render_minor);
|
||||
read_property(properties, "hive_id", agent_info.hive_id);
|
||||
read_property(properties, "num_sdma_engines", agent_info.num_sdma_engines);
|
||||
read_property(properties, "num_sdma_xgmi_engines", agent_info.num_sdma_xgmi_engines);
|
||||
read_property(
|
||||
properties, "num_sdma_queues_per_engine", agent_info.num_sdma_queues_per_engine);
|
||||
read_property(properties, "num_cp_queues", agent_info.num_cp_queues);
|
||||
read_property(properties, "max_engine_clk_ccompute", agent_info.max_engine_clk_ccompute);
|
||||
|
||||
agent_info.name = "";
|
||||
agent_info.product_name = "";
|
||||
agent_info.vendor_name = "";
|
||||
if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
constexpr auto workgrp_max = 1024;
|
||||
constexpr auto grid_max = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
read_property(
|
||||
properties, "max_engine_clk_fcompute", agent_info.max_engine_clk_fcompute);
|
||||
read_property(properties, "local_mem_size", agent_info.local_mem_size);
|
||||
read_property(properties, "fw_version", agent_info.fw_version.Value);
|
||||
read_property(properties, "capability", agent_info.capability.Value);
|
||||
read_property(properties, "sdma_fw_version", agent_info.sdma_fw_version.Value);
|
||||
agent_info.fw_version.Value &= 0x3ff;
|
||||
agent_info.sdma_fw_version.Value &= 0x3ff;
|
||||
agent_info.workgroup_max_size = workgrp_max; // hardcoded in hsa-runtime
|
||||
agent_info.workgroup_max_dim = {workgrp_max, workgrp_max, workgrp_max};
|
||||
agent_info.grid_max_size = grid_max; // hardcoded in hsa-runtime
|
||||
agent_info.grid_max_dim = {grid_max, grid_max, grid_max};
|
||||
agent_info.cu_count = agent_info.simd_count / agent_info.simd_per_cu;
|
||||
|
||||
if(int drm_fd = 0; (drm_fd = drmOpenRender(agent_info.drm_render_minor)) >= 0)
|
||||
{
|
||||
uint32_t major_version = 0;
|
||||
uint32_t minor_version = 0;
|
||||
auto* device_handle = amdgpu_device_handle{};
|
||||
if(amdgpu_device_initialize(
|
||||
drm_fd, &major_version, &minor_version, &device_handle) == 0)
|
||||
{
|
||||
auto major = (agent_info.gfx_target_version / 10000) % 100;
|
||||
auto minor = (agent_info.gfx_target_version / 100) % 100;
|
||||
auto step = (agent_info.gfx_target_version % 100);
|
||||
|
||||
agent_info.name =
|
||||
strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str());
|
||||
agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle));
|
||||
agent_info.vendor_name = strdup("AMD");
|
||||
|
||||
amdgpu_gpu_info gpu_info = {};
|
||||
if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0)
|
||||
{
|
||||
agent_info.family_id = gpu_info.family_id;
|
||||
}
|
||||
amdgpu_device_deinitialize(device_handle);
|
||||
}
|
||||
drmClose(drm_fd);
|
||||
}
|
||||
|
||||
constexpr auto gfx90a_version = compute_version(9, 0, 10);
|
||||
|
||||
if(agent_info.gfx_target_version >= gfx90a_version)
|
||||
{
|
||||
agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{
|
||||
mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()};
|
||||
}
|
||||
}
|
||||
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
{
|
||||
agent_info.cu_count = agent_info.cpu_cores_count;
|
||||
agent_info.vendor_name = strdup("CPU");
|
||||
for(const auto& itr : cpu_info_v)
|
||||
{
|
||||
if(agent_info.cpu_core_id_base == itr.apicid)
|
||||
{
|
||||
agent_info.name = strdup(itr.model_name.c_str());
|
||||
agent_info.product_name = strdup(agent_info.name);
|
||||
agent_info.family_id = itr.family;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(properties.count("num_xcc") > 0)
|
||||
read_property(properties, "num_xcc", agent_info.num_xcc);
|
||||
else
|
||||
agent_info.num_xcc = 1;
|
||||
|
||||
agent_info.max_waves_per_cu = agent_info.simd_per_cu * agent_info.max_waves_per_simd;
|
||||
|
||||
if(agent_info.simd_arrays_per_engine > 0)
|
||||
{
|
||||
agent_info.num_shader_banks =
|
||||
agent_info.array_count / agent_info.simd_arrays_per_engine;
|
||||
|
||||
// depends on above
|
||||
if(agent_info.num_shader_banks * agent_info.simd_arrays_per_engine > 0)
|
||||
{
|
||||
agent_info.cu_per_engine =
|
||||
(agent_info.simd_count / agent_info.simd_per_cu) /
|
||||
(agent_info.num_shader_banks * agent_info.simd_arrays_per_engine);
|
||||
}
|
||||
}
|
||||
|
||||
agent_info.mem_banks = nullptr;
|
||||
agent_info.caches = nullptr;
|
||||
agent_info.io_links = nullptr;
|
||||
|
||||
if(agent_info.mem_banks_count > 0)
|
||||
{
|
||||
agent_info.mem_banks = new rocprofiler_agent_mem_bank_t[agent_info.mem_banks_count];
|
||||
|
||||
for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i)
|
||||
{
|
||||
using heap_type_t = HSA_HEAPTYPE;
|
||||
using underlying_heap_type_t = std::underlying_type_t<heap_type_t>;
|
||||
|
||||
auto subproperties =
|
||||
read_map(node_path / "mem_banks" / std::to_string(i) / "properties");
|
||||
|
||||
auto _heap_type = underlying_heap_type_t{};
|
||||
read_property(subproperties, "heap_type", _heap_type);
|
||||
agent_info.mem_banks[i].heap_type = static_cast<heap_type_t>(_heap_type);
|
||||
|
||||
read_property(
|
||||
subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes);
|
||||
read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty);
|
||||
read_property(subproperties, "width", agent_info.mem_banks[i].width);
|
||||
read_property(subproperties, "mem_clk_max", agent_info.mem_banks[i].mem_clk_max);
|
||||
}
|
||||
}
|
||||
|
||||
if(agent_info.caches_count > 0)
|
||||
{
|
||||
agent_info.caches = new rocprofiler_agent_cache_t[agent_info.caches_count];
|
||||
|
||||
for(uint32_t i = 0; i < agent_info.caches_count; ++i)
|
||||
{
|
||||
auto subproperties =
|
||||
read_map(node_path / "caches" / std::to_string(i) / "properties");
|
||||
|
||||
read_property(
|
||||
subproperties, "processor_id_low", agent_info.caches[i].processor_id_low);
|
||||
read_property(subproperties, "level", agent_info.caches[i].level);
|
||||
read_property(subproperties, "size", agent_info.caches[i].size);
|
||||
read_property(
|
||||
subproperties, "cache_line_size", agent_info.caches[i].cache_line_size);
|
||||
read_property(
|
||||
subproperties, "cache_lines_per_tag", agent_info.caches[i].cache_lines_per_tag);
|
||||
read_property(subproperties, "association", agent_info.caches[i].association);
|
||||
read_property(subproperties, "latency", agent_info.caches[i].latency);
|
||||
read_property(subproperties, "type", agent_info.caches[i].type.Value);
|
||||
}
|
||||
}
|
||||
|
||||
if(agent_info.io_links_count > 0)
|
||||
{
|
||||
agent_info.io_links = new rocprofiler_agent_io_link_t[agent_info.io_links_count];
|
||||
|
||||
for(uint32_t i = 0; i < agent_info.io_links_count; ++i)
|
||||
{
|
||||
auto subproperties =
|
||||
read_map(node_path / "io_links" / std::to_string(i) / "properties");
|
||||
|
||||
read_property(subproperties, "type", agent_info.io_links[i].type);
|
||||
read_property(subproperties, "version_major", agent_info.io_links[i].version_major);
|
||||
read_property(subproperties, "version_minor", agent_info.io_links[i].version_minor);
|
||||
read_property(subproperties, "node_from", agent_info.io_links[i].node_from);
|
||||
read_property(subproperties, "node_to", agent_info.io_links[i].node_to);
|
||||
read_property(subproperties, "weight", agent_info.io_links[i].weight);
|
||||
read_property(subproperties, "min_latency", agent_info.io_links[i].min_latency);
|
||||
read_property(subproperties, "max_latency", agent_info.io_links[i].max_latency);
|
||||
read_property(subproperties, "min_bandwidth", agent_info.io_links[i].min_bandwidth);
|
||||
read_property(subproperties, "max_bandwidth", agent_info.io_links[i].max_bandwidth);
|
||||
read_property(subproperties,
|
||||
"recommended_transfer_size",
|
||||
agent_info.io_links[i].recommended_transfer_size);
|
||||
read_property(subproperties, "flags", agent_info.io_links[i].flags.LinkProperty);
|
||||
}
|
||||
}
|
||||
|
||||
data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) {
|
||||
if(ptr)
|
||||
{
|
||||
auto free_cstring = [](const char*& val) {
|
||||
if(val && ::strnlen(val, 1) > 0) ::free(const_cast<char*>(val));
|
||||
val = "";
|
||||
};
|
||||
|
||||
delete[] ptr->mem_banks;
|
||||
delete[] ptr->caches;
|
||||
delete[] ptr->io_links;
|
||||
free_cstring(ptr->name);
|
||||
free_cstring(ptr->vendor_name);
|
||||
free_cstring(ptr->product_name);
|
||||
free_cstring(ptr->model_name);
|
||||
}
|
||||
delete ptr;
|
||||
});
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_agent_topology()
|
||||
{
|
||||
static auto _v = read_topology();
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace agent
|
||||
} // namespace rocprofiler
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
|
||||
size_t agent_size,
|
||||
void* user_data)
|
||||
{
|
||||
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
|
||||
if(agent_size > sizeof(rocprofiler_agent_t))
|
||||
{
|
||||
LOG(ERROR) << "rocprofiler_agent_t used by caller is ABI-incompatible with "
|
||||
"rocprofiler_agent_t in rocprofiler";
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI;
|
||||
}
|
||||
|
||||
auto pc_sampling_configs = std::vector<pc_sampling_config_vec_t>{};
|
||||
auto get_agents = [&pc_sampling_configs]() {
|
||||
static const auto _default_pc_config =
|
||||
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
1UL,
|
||||
1000000000UL,
|
||||
0};
|
||||
auto temporaries_ = std::vector<rocprofiler_agent_t>{};
|
||||
const auto& agent_info = rocprofiler::hsa::all_agents();
|
||||
for(const auto& agent : agent_info)
|
||||
{
|
||||
auto& _data = pc_sampling_configs.emplace_back();
|
||||
if(agent.isGpu()) _data = {_default_pc_config};
|
||||
temporaries_.emplace_back(rocprofiler_agent_t{
|
||||
.id = rocprofiler_agent_id_t{.handle = temporaries_.size()},
|
||||
.type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU
|
||||
: (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU
|
||||
: ROCPROFILER_AGENT_TYPE_NONE)),
|
||||
.name = agent.getNameChar(),
|
||||
.pc_sampling_configs =
|
||||
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}});
|
||||
}
|
||||
return temporaries_;
|
||||
};
|
||||
|
||||
auto agents = get_agents();
|
||||
auto pointers = std::vector<rocprofiler_agent_t*>{};
|
||||
// auto agents = get_agents();
|
||||
auto& agents = rocprofiler::agent::get_agent_topology();
|
||||
auto pointers = std::vector<const rocprofiler_agent_t*>{};
|
||||
pointers.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
pointers.emplace_back(&agent);
|
||||
pointers.emplace_back(agent.get());
|
||||
}
|
||||
|
||||
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
|
||||
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
|
||||
"rocprofiler");
|
||||
return callback(pointers.data(), pointers.size(), user_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ findViaEnvironment(const std::string& filename)
|
||||
} // namespace
|
||||
|
||||
MetricMap
|
||||
getDerrivedHardwareMetrics()
|
||||
getDerivedHardwareMetrics()
|
||||
{
|
||||
return loadXml(findViaEnvironment("derived_counters.xml"));
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ MetricMap
|
||||
getBaseHardwareMetrics();
|
||||
|
||||
MetricMap
|
||||
getDerrivedHardwareMetrics();
|
||||
getDerivedHardwareMetrics();
|
||||
|
||||
} // namespace counters
|
||||
|
||||
@@ -98,4 +98,4 @@ struct formatter<counters::MetricMap>
|
||||
return fmt::format_to(ctx.out(), "{}", out);
|
||||
}
|
||||
};
|
||||
} // namespace fmt
|
||||
} // namespace fmt
|
||||
|
||||
@@ -7,13 +7,13 @@
|
||||
namespace
|
||||
{
|
||||
auto
|
||||
loadTestData(std::unordered_map<std::string, std::vector<std::vector<std::string>>> map)
|
||||
loadTestData(const std::unordered_map<std::string, std::vector<std::vector<std::string>>>& map)
|
||||
{
|
||||
std::unordered_map<std::string, std::vector<counters::Metric>> ret;
|
||||
for(auto& [gfx, dataMap] : map)
|
||||
for(const auto& [gfx, dataMap] : map)
|
||||
{
|
||||
auto& metric_vec = ret.emplace(gfx, std::vector<counters::Metric>{}).first->second;
|
||||
for(auto& data_vec : dataMap)
|
||||
for(const auto& data_vec : dataMap)
|
||||
{
|
||||
metric_vec.emplace_back(
|
||||
data_vec.at(0), data_vec.at(1), data_vec.at(2), data_vec.at(4), data_vec.at(3));
|
||||
@@ -23,7 +23,7 @@ loadTestData(std::unordered_map<std::string, std::vector<std::vector<std::string
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(MetricsTest, BaseMetricLoad)
|
||||
TEST(metrics, base_load)
|
||||
{
|
||||
auto x = counters::getBaseHardwareMetrics();
|
||||
auto test_data = loadTestData(basic_gfx908);
|
||||
@@ -32,11 +32,11 @@ TEST(MetricsTest, BaseMetricLoad)
|
||||
EXPECT_EQ(fmt::format("{}", x["gfx908"]), fmt::format("{}", test_data["gfx908"]));
|
||||
}
|
||||
|
||||
TEST(MetricsTest, DerrivedMetricLoad)
|
||||
TEST(metrics, derived_load)
|
||||
{
|
||||
auto x = counters::getDerrivedHardwareMetrics();
|
||||
auto x = counters::getDerivedHardwareMetrics();
|
||||
auto test_data = loadTestData(derrived_gfx908);
|
||||
ASSERT_EQ(x.count("gfx908"), 1);
|
||||
ASSERT_EQ(test_data.count("gfx908"), 1);
|
||||
EXPECT_EQ(fmt::format("{}", x["gfx908"]), fmt::format("{}", test_data["gfx908"]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp agent.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp agent.hpp)
|
||||
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp)
|
||||
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp)
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
|
||||
${ROCPROFILER_LIB_HSA_HEADERS})
|
||||
|
||||
|
||||
@@ -1,499 +0,0 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "agent.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::unordered_map<long long, long long>
|
||||
get_gpu_nodes_near_cpu()
|
||||
{
|
||||
std::unordered_map<long long, long long> gpu_numa_nodes_near_cpu;
|
||||
long long gpu_numa_nodes_start = 0;
|
||||
|
||||
std::string path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
for(const auto& entry : fs::directory_iterator(path))
|
||||
{
|
||||
long long node_id = std::stoll(entry.path().filename().c_str());
|
||||
std::ifstream gpu_id_file;
|
||||
std::string gpu_path = entry.path().c_str();
|
||||
gpu_path += "/gpu_id";
|
||||
gpu_id_file.open(gpu_path);
|
||||
std::string gpu_id_str;
|
||||
if(gpu_id_file.is_open())
|
||||
{
|
||||
gpu_id_file >> gpu_id_str;
|
||||
|
||||
if(!gpu_id_str.empty())
|
||||
{
|
||||
auto gpu_id = std::stoll(gpu_id_str);
|
||||
if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0))
|
||||
{
|
||||
gpu_numa_nodes_start = node_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
gpu_id_file.close();
|
||||
}
|
||||
|
||||
path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
for(const auto& entry : fs::directory_iterator(path))
|
||||
{
|
||||
long long node_id = std::stoll(entry.path().filename().c_str());
|
||||
std::string numa_node_path = entry.path().c_str();
|
||||
long long agent_id = std::stoll(entry.path().filename().c_str());
|
||||
if(agent_id >= gpu_numa_nodes_start)
|
||||
{
|
||||
numa_node_path += "/io_links";
|
||||
for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path))
|
||||
{
|
||||
std::string numa_node_entry_properties_path = numa_node_entry.path().c_str();
|
||||
numa_node_entry_properties_path += "/properties";
|
||||
std::ifstream gpu_properties_file;
|
||||
gpu_properties_file.open(numa_node_entry_properties_path);
|
||||
std::string gpu_properties_file_line;
|
||||
if(gpu_properties_file.is_open())
|
||||
{
|
||||
while(gpu_properties_file)
|
||||
{
|
||||
std::getline(gpu_properties_file, gpu_properties_file_line);
|
||||
std::string delimiter = " ";
|
||||
std::stringstream ss(gpu_properties_file_line);
|
||||
std::string word;
|
||||
ss >> word;
|
||||
if(word == "node_to")
|
||||
{
|
||||
ss >> word;
|
||||
long long near_cpu_node_id = std::stoll(word);
|
||||
if(near_cpu_node_id < gpu_numa_nodes_start)
|
||||
{
|
||||
gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
gpu_properties_file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
return gpu_numa_nodes_near_cpu;
|
||||
}
|
||||
|
||||
// This function checks to see if the provided
|
||||
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
|
||||
// the function adds an additional requirement that the pool have the
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
|
||||
// pools must NOT have this property.
|
||||
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
hsa_status_t
|
||||
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg)
|
||||
{
|
||||
if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
auto [api_ptr, pool_ptr] =
|
||||
*static_cast<std::pair<const AmdExtTable*, hsa_amd_memory_pool_t*>*>(data);
|
||||
hsa_amd_segment_t segment;
|
||||
LOG_IF(FATAL,
|
||||
api_ptr->hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR)
|
||||
<< "Could not get pool segment";
|
||||
if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flag;
|
||||
LOG_IF(FATAL,
|
||||
api_ptr->hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR)
|
||||
<< "Could not get flag value";
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg))
|
||||
{
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
*(pool_ptr) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t
|
||||
FindStandardPool(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
return FindGlobalPool(pool, data, false);
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t
|
||||
FindKernArgPool(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
void
|
||||
init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent)
|
||||
{
|
||||
CHECK(!cpu_agent.isGpu());
|
||||
auto params = std::make_pair(&api, &cpu_agent.cpu_pool);
|
||||
|
||||
auto status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, ¶ms);
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: Command Buffer Pool is not initialized";
|
||||
|
||||
params.second = &cpu_agent.kernarg_pool;
|
||||
status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params));
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: Output Buffer Pool is not initialized";
|
||||
}
|
||||
|
||||
void
|
||||
init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info)
|
||||
{
|
||||
CHECK(agent_info.isGpu());
|
||||
auto params = std::make_pair(&api, &agent_info.gpu_pool);
|
||||
auto status =
|
||||
api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, ¶ms);
|
||||
|
||||
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
|
||||
<< "Error: GPU Pool is not initialized";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
const std::vector<AgentInfo>&
|
||||
all_agents()
|
||||
{
|
||||
static std::shared_ptr<const std::vector<AgentInfo>> agents = AgentInfo::getAgents(
|
||||
{.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info},
|
||||
{.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info,
|
||||
.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools,
|
||||
.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate,
|
||||
.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free,
|
||||
.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access});
|
||||
return *agents;
|
||||
}
|
||||
|
||||
std::shared_ptr<const std::vector<AgentInfo>>
|
||||
AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api)
|
||||
{
|
||||
std::vector<hsa_agent_t> agents;
|
||||
std::shared_ptr<std::vector<AgentInfo>> agent_info_ptr =
|
||||
std::make_shared<std::vector<AgentInfo>>();
|
||||
auto& agent_info = *agent_info_ptr;
|
||||
|
||||
api.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
|
||||
auto near_gpu_map = get_gpu_nodes_near_cpu();
|
||||
std::unordered_map<int64_t, AgentInfo*> cpu_id_to_agent;
|
||||
|
||||
// Reserve is required to prevent reallocation (which breaks cpu_id_to_agent)
|
||||
agent_info.reserve(agents.size());
|
||||
for(auto& agent : agents)
|
||||
{
|
||||
auto& new_agent = agent_info.emplace_back(agent, api);
|
||||
if(!new_agent.isGpu())
|
||||
{
|
||||
uint32_t cpu_numa_node_id;
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch numa info";
|
||||
new_agent.setNumaNode(cpu_numa_node_id);
|
||||
cpu_id_to_agent[cpu_numa_node_id] = &new_agent;
|
||||
init_cpu_pool(ext_api, new_agent);
|
||||
}
|
||||
else if(new_agent.isGpu())
|
||||
{
|
||||
uint32_t node_id;
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(
|
||||
agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&node_id) != HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch driver node id";
|
||||
new_agent.setIndex(node_id);
|
||||
LOG_IF(FATAL,
|
||||
api.hsa_agent_get_info_fn(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NODE),
|
||||
&node_id) != HSA_STATUS_SUCCESS)
|
||||
<< "Could not fetch driver node id";
|
||||
new_agent.setNumaNode(node_id);
|
||||
init_gpu_pool(ext_api, new_agent);
|
||||
}
|
||||
}
|
||||
|
||||
// Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first
|
||||
// NUMA nodes)
|
||||
for(auto& agent : agent_info)
|
||||
{
|
||||
if(agent.isGpu())
|
||||
{
|
||||
auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode());
|
||||
LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent);
|
||||
|
||||
auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu);
|
||||
LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu);
|
||||
agent.setNearCpuAgent((*id_to_agent)->getAgent());
|
||||
agent.cpu_pool = (*id_to_agent)->cpu_pool;
|
||||
agent.kernarg_pool = (*id_to_agent)->kernarg_pool;
|
||||
}
|
||||
}
|
||||
return agent_info_ptr;
|
||||
}
|
||||
|
||||
AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table)
|
||||
: handle_(agent.handle)
|
||||
, agent_(agent)
|
||||
{
|
||||
if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info failed";
|
||||
}
|
||||
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
|
||||
|
||||
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
|
||||
memcpy(gfxip_, name_, gfxip_label_len);
|
||||
gfxip_[gfxip_label_len] = '\0';
|
||||
|
||||
if(type_ != HSA_DEVICE_TYPE_GPU)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
|
||||
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
|
||||
|
||||
table.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
|
||||
|
||||
if(table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
|
||||
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
|
||||
table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
|
||||
&waves_per_cu_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed";
|
||||
}
|
||||
|
||||
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
|
||||
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
|
||||
|
||||
if(table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN,
|
||||
&pci_domain_) != HSA_STATUS_SUCCESS ||
|
||||
table.hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
|
||||
&pci_location_id_) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
LOG(FATAL) << "hsa_agent_get_info for PCI info failed";
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
AgentInfo::getIndex() const
|
||||
{
|
||||
return index_;
|
||||
}
|
||||
|
||||
hsa_device_type_t
|
||||
AgentInfo::getType() const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
AgentInfo::getHandle() const
|
||||
{
|
||||
return handle_;
|
||||
}
|
||||
|
||||
const std::string_view
|
||||
AgentInfo::getName() const
|
||||
{
|
||||
return name_;
|
||||
}
|
||||
|
||||
std::string
|
||||
AgentInfo::getGfxip() const
|
||||
{
|
||||
return std::string(gfxip_);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxWaveSize() const
|
||||
{
|
||||
return max_wave_size_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxQueueSize() const
|
||||
{
|
||||
return max_queue_size_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getCUCount() const
|
||||
{
|
||||
return cu_num_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getSimdCountPerCU() const
|
||||
{
|
||||
return simds_per_cu_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getShaderEngineCount() const
|
||||
{
|
||||
return se_num_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getShaderArraysPerSE() const
|
||||
{
|
||||
return shader_arrays_per_se_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getMaxWavesPerCU() const
|
||||
{
|
||||
return waves_per_cu_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getCUCountPerSH() const
|
||||
{
|
||||
return compute_units_per_sh_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getWaveSlotsPerSimd() const
|
||||
{
|
||||
return wave_slots_per_simd_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getPCIDomain() const
|
||||
{
|
||||
return pci_domain_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getPCILocationID() const
|
||||
{
|
||||
return pci_location_id_;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getXccCount() const
|
||||
{
|
||||
return xcc_num_;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setIndex(uint64_t index)
|
||||
{
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setType(hsa_device_type_t type)
|
||||
{
|
||||
type_ = type;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setHandle(uint64_t handle)
|
||||
{
|
||||
handle_ = handle;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setName(const std::string& name)
|
||||
{
|
||||
constexpr auto name_len = sizeof(name_) / sizeof(char);
|
||||
//
|
||||
// char* strncpy(char* destination, const char* source, size_t num)
|
||||
//
|
||||
// If the end of the source string (which is signaled by a null-character) is found before num
|
||||
// characters have been copied, destination is padded with zeros until a total of num characters
|
||||
// have been written to it
|
||||
strncpy(name_, name.c_str(), name_len - 2);
|
||||
// ensure always terminated
|
||||
name_[name_len - 1] = '\0';
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setNumaNode(uint32_t numa_node)
|
||||
{
|
||||
numa_node_ = numa_node;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AgentInfo::getNumaNode() const
|
||||
{
|
||||
return numa_node_;
|
||||
}
|
||||
|
||||
void
|
||||
AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent)
|
||||
{
|
||||
near_cpu_agent_ = near_cpu_agent;
|
||||
}
|
||||
|
||||
hsa_agent_t
|
||||
AgentInfo::getNearCpuAgent()
|
||||
{
|
||||
return near_cpu_agent_;
|
||||
}
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
@@ -1,174 +0,0 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include "fmt/core.h"
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace hsa
|
||||
{
|
||||
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
|
||||
|
||||
class AgentInfo
|
||||
{
|
||||
public:
|
||||
AgentInfo() = default;
|
||||
AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table);
|
||||
uint64_t getIndex() const;
|
||||
hsa_device_type_t getType() const;
|
||||
bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; }
|
||||
bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; }
|
||||
uint64_t getHandle() const;
|
||||
const std::string_view getName() const;
|
||||
const char* getNameChar() const { return name_; }
|
||||
std::string getGfxip() const;
|
||||
uint32_t getMaxWaveSize() const;
|
||||
uint32_t getMaxQueueSize() const;
|
||||
uint32_t getCUCount() const;
|
||||
uint32_t getSimdCountPerCU() const;
|
||||
uint32_t getShaderEngineCount() const;
|
||||
uint32_t getShaderArraysPerSE() const;
|
||||
uint32_t getMaxWavesPerCU() const;
|
||||
uint32_t getCUCountPerSH() const;
|
||||
uint32_t getWaveSlotsPerSimd() const;
|
||||
uint32_t getPCIDomain() const;
|
||||
uint32_t getPCILocationID() const;
|
||||
uint32_t getXccCount() const;
|
||||
|
||||
void setIndex(uint64_t index);
|
||||
void setType(hsa_device_type_t type);
|
||||
void setHandle(uint64_t handle);
|
||||
void setName(const std::string& name);
|
||||
|
||||
void setNumaNode(uint32_t numa_node);
|
||||
uint32_t getNumaNode() const;
|
||||
|
||||
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
|
||||
hsa_agent_t getNearCpuAgent();
|
||||
hsa_agent_t getAgent() const { return agent_; }
|
||||
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
|
||||
static std::shared_ptr<const std::vector<AgentInfo>> getAgents(const CoreApiTable&,
|
||||
const AmdExtTable&);
|
||||
|
||||
// Keep move constuctors (i.e. std::move())
|
||||
AgentInfo(AgentInfo&& other) noexcept = default;
|
||||
AgentInfo& operator=(AgentInfo&& other) noexcept = default;
|
||||
|
||||
// Do not allow copying this class
|
||||
AgentInfo(const AgentInfo&) = delete;
|
||||
AgentInfo& operator=(const AgentInfo&) = delete;
|
||||
|
||||
private:
|
||||
uint64_t index_ = 0;
|
||||
hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint64_t handle_ = 0;
|
||||
char name_[64] = {'\0'};
|
||||
char gfxip_[64] = {'\0'};
|
||||
uint32_t max_wave_size_ = 0;
|
||||
uint32_t max_queue_size_ = 0;
|
||||
uint32_t cu_num_ = 0;
|
||||
uint32_t simds_per_cu_ = 0;
|
||||
uint32_t se_num_ = 0;
|
||||
uint32_t shader_arrays_per_se_ = 0;
|
||||
uint32_t waves_per_cu_ = 0;
|
||||
// CUs per SH/SA
|
||||
uint32_t compute_units_per_sh_ = 0;
|
||||
uint32_t wave_slots_per_simd_ = 0;
|
||||
// Number of XCCs on the GPU
|
||||
uint32_t xcc_num_ = 0;
|
||||
|
||||
uint32_t pci_domain_ = 0;
|
||||
uint32_t pci_location_id_ = 0;
|
||||
|
||||
uint32_t numa_node_ = 0;
|
||||
hsa_agent_t near_cpu_agent_ = {};
|
||||
hsa_agent_t agent_ = {};
|
||||
};
|
||||
|
||||
const std::vector<AgentInfo>&
|
||||
all_agents();
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
namespace fmt
|
||||
{
|
||||
template <>
|
||||
struct formatter<rocprofiler::hsa::AgentInfo>
|
||||
{
|
||||
template <typename ParseContext>
|
||||
constexpr auto parse(ParseContext& ctx)
|
||||
{
|
||||
return ctx.begin();
|
||||
}
|
||||
|
||||
template <typename Ctx>
|
||||
auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const
|
||||
{
|
||||
auto device_type_name = [](auto dev) -> std::string_view {
|
||||
switch(dev)
|
||||
{
|
||||
case HSA_DEVICE_TYPE_CPU: return "CPU";
|
||||
case HSA_DEVICE_TYPE_GPU: return "GPU";
|
||||
case HSA_DEVICE_TYPE_DSP: return "DSP";
|
||||
}
|
||||
return "UNKNOWN";
|
||||
};
|
||||
|
||||
return fmt::format_to(
|
||||
ctx.out(),
|
||||
R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})",
|
||||
agent.getIndex(),
|
||||
device_type_name(agent.getType()),
|
||||
agent.getHandle(),
|
||||
agent.getName(),
|
||||
agent.getGfxip(),
|
||||
agent.getMaxWaveSize(),
|
||||
agent.getMaxQueueSize(),
|
||||
agent.getCUCount(),
|
||||
agent.getSimdCountPerCU(),
|
||||
agent.getShaderEngineCount(),
|
||||
agent.getShaderArraysPerSE(),
|
||||
agent.getMaxWavesPerCU(),
|
||||
agent.getCUCountPerSH(),
|
||||
agent.getWaveSlotsPerSimd(),
|
||||
agent.getPCIDomain(),
|
||||
agent.getPCILocationID(),
|
||||
agent.getXccCount());
|
||||
}
|
||||
};
|
||||
} // namespace fmt
|
||||
@@ -100,28 +100,6 @@ get_forced_configure()
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
init_logging()
|
||||
{
|
||||
static auto _once = std::once_flag{};
|
||||
std::call_once(_once, []() {
|
||||
auto get_argv0 = []() {
|
||||
auto ifs = std::ifstream{"/proc/self/cmdline"};
|
||||
auto sarg = std::string{};
|
||||
while(ifs && !ifs.eof())
|
||||
{
|
||||
ifs >> sarg;
|
||||
if(!sarg.empty()) break;
|
||||
}
|
||||
return sarg;
|
||||
};
|
||||
|
||||
static auto argv0 = get_argv0();
|
||||
google::InitGoogleLogging(argv0.c_str());
|
||||
LOG(INFO) << "logging initialized";
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
get_link_map()
|
||||
{
|
||||
@@ -397,6 +375,28 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
init_logging()
|
||||
{
|
||||
static auto _once = std::once_flag{};
|
||||
std::call_once(_once, []() {
|
||||
auto get_argv0 = []() {
|
||||
auto ifs = std::ifstream{"/proc/self/cmdline"};
|
||||
auto sarg = std::string{};
|
||||
while(ifs && !ifs.eof())
|
||||
{
|
||||
ifs >> sarg;
|
||||
if(!sarg.empty()) break;
|
||||
}
|
||||
return sarg;
|
||||
};
|
||||
|
||||
static auto argv0 = get_argv0();
|
||||
google::InitGoogleLogging(argv0.c_str());
|
||||
LOG(INFO) << "logging initialized";
|
||||
});
|
||||
}
|
||||
|
||||
uint32_t
|
||||
get_client_offset()
|
||||
{
|
||||
|
||||
@@ -54,6 +54,10 @@ namespace rocprofiler
|
||||
{
|
||||
namespace registration
|
||||
{
|
||||
// initialize google logging
|
||||
void
|
||||
init_logging();
|
||||
|
||||
// initialize the clients
|
||||
void
|
||||
initialize();
|
||||
|
||||
@@ -24,22 +24,6 @@
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler/context/context.hpp"
|
||||
#include "lib/rocprofiler/context/domain.hpp"
|
||||
#include "lib/rocprofiler/hsa/agent.hpp"
|
||||
#include "lib/rocprofiler/hsa/hsa.hpp"
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename... Tp>
|
||||
auto
|
||||
consume_args(Tp&&...)
|
||||
{}
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
|
||||
@@ -11,10 +11,10 @@ include(GoogleTest)
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
set(rocprofiler_lib_sources buffer.cpp)
|
||||
set(rocprofiler_lib_sources agent.cpp buffer.cpp timestamp.cpp version.cpp)
|
||||
|
||||
add_executable(rocprofiler-lib-tests)
|
||||
target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources})
|
||||
target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources} details/agent.cpp)
|
||||
target_link_libraries(
|
||||
rocprofiler-lib-tests
|
||||
PRIVATE rocprofiler::rocprofiler-static-library
|
||||
|
||||
@@ -0,0 +1,217 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <rocprofiler/agent.h>
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/registration.h>
|
||||
|
||||
#include "lib/rocprofiler/registration.hpp"
|
||||
#include "lib/rocprofiler/tests/details/agent.hpp"
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <typeinfo>
|
||||
|
||||
TEST(rocprofiler_lib, agent_abi)
|
||||
{
|
||||
constexpr auto msg = "ABI break. NEW FIELDS MAY ONLY BE ADDED AT END OF STRUCT";
|
||||
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, size), 0) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, id), 8) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, type), 16) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_cores_count), 20) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_count), 24) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks_count), 28) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, caches_count), 32) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links_count), 36) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_core_id_base), 40) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_id_base), 44) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_simd), 48) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, lds_size_in_kb), 52) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, gds_size_in_kb), 56) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_gws), 60) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, wave_front_size), 64) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_xcc), 68) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_count), 72) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, array_count), 76) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_shader_banks), 80) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_arrays_per_engine), 84) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_simd_array), 88) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_per_cu), 92) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_slots_scratch_cu), 96) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, gfx_target_version), 100) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_id), 104) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, device_id), 106) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, location_id), 108) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, domain), 112) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, drm_render_minor), 116) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_engines), 120) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_xgmi_engines), 124) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_queues_per_engine), 128) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_cp_queues), 132) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_ccompute), 136) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_fcompute), 140) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, sdma_fw_version), 144) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, fw_version), 148) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, capability), 152) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_engine), 156) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_cu), 160) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, family_id), 164) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_size), 168) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_size), 172) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, local_mem_size), 176) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, hive_id), 184) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, gpu_id), 192) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_dim), 200) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_dim), 212) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks), 224) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, caches), 232) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links), 240) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, name), 248) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
|
||||
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg;
|
||||
// Add test for offset of new field above this. Do NOT change any existing values!
|
||||
|
||||
// If a new field is added, increase this value by the size of the new field(s)
|
||||
EXPECT_EQ(sizeof(rocprofiler_agent_t), 296)
|
||||
<< "ABI break. If you added a new field, make sure that this is the only new check that "
|
||||
"failed. Please add a check for the new field at the offset and update this test to the "
|
||||
"new size";
|
||||
}
|
||||
|
||||
TEST(rocprofiler_lib, agent)
|
||||
{
|
||||
rocprofiler::registration::init_logging();
|
||||
|
||||
auto info_ret = std::system("/usr/bin/rocminfo");
|
||||
EXPECT_EQ(info_ret, 0);
|
||||
|
||||
auto sys_ret = std::system(
|
||||
"/bin/bash -c 'for i in $(find /sys/class/kfd/kfd/topology/nodes -maxdepth 2 -type f | "
|
||||
"grep properties | sort); do echo -e \"\n##### ${i} #####\n\"; cat ${i}; echo \"\"; done'");
|
||||
EXPECT_EQ(sys_ret, 0);
|
||||
|
||||
auto agents = std::vector<const rocprofiler_agent_t*>{};
|
||||
rocprofiler_available_agents_cb_t iterate_cb =
|
||||
[](const rocprofiler_agent_t** agents_arr, size_t num_agents, void* user_data) {
|
||||
auto* agents_v = static_cast<std::vector<const rocprofiler_agent_t*>*>(user_data);
|
||||
// EXPECT_EQ(num_agents, hsa_agents_v.size());
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
{
|
||||
const auto* agent = agents_arr[i];
|
||||
agents_v->emplace_back(agent);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
auto status =
|
||||
rocprofiler_query_available_agents(iterate_cb,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
const_cast<void*>(static_cast<const void*>(&agents)));
|
||||
|
||||
EXPECT_EQ(status, ROCPROFILER_STATUS_SUCCESS);
|
||||
|
||||
auto _rocm_info = rocprofiler::test::rocm_info{};
|
||||
EXPECT_EQ(rocprofiler::test::get_info(_rocm_info), 0);
|
||||
|
||||
auto& hsa_agents_v = _rocm_info.agents;
|
||||
|
||||
ASSERT_EQ(agents.size(), hsa_agents_v.size());
|
||||
for(size_t i = 0; i < agents.size(); ++i)
|
||||
{
|
||||
const auto* agent = agents.at(i);
|
||||
|
||||
auto msg = fmt::format("name={}, model={}, gfx version={}, id={}, type={}",
|
||||
agent->name,
|
||||
agent->model_name,
|
||||
agent->gfx_target_version,
|
||||
agent->id.handle,
|
||||
agent->type == ROCPROFILER_AGENT_TYPE_CPU ? "CPU" : "GPU");
|
||||
|
||||
// std::cout << msg << std::endl;
|
||||
EXPECT_LT(i, hsa_agents_v.size()) << msg;
|
||||
if(i >= hsa_agents_v.size()) continue;
|
||||
|
||||
auto* hsa_agent = &hsa_agents_v.at(i);
|
||||
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
{
|
||||
EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_CPU) << msg;
|
||||
}
|
||||
else if(agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_GPU) << msg;
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_TRUE(false) << msg << " :: agent-type != CPU|GPU :: " << agent->type;
|
||||
}
|
||||
|
||||
EXPECT_EQ(std::string_view{agent->name}, std::string_view{hsa_agent->name}) << msg;
|
||||
EXPECT_EQ(std::string_view{agent->vendor_name}, std::string_view{hsa_agent->vendor_name})
|
||||
<< msg;
|
||||
EXPECT_EQ(std::string_view{agent->product_name},
|
||||
std::string_view{hsa_agent->device_mkt_name})
|
||||
<< msg;
|
||||
EXPECT_EQ(agent->simd_count, hsa_agent->compute_unit * hsa_agent->simds_per_cu) << msg;
|
||||
EXPECT_EQ(agent->cu_count, hsa_agent->compute_unit) << msg;
|
||||
EXPECT_EQ(agent->simd_per_cu, hsa_agent->simds_per_cu) << msg;
|
||||
EXPECT_EQ(agent->wave_front_size, hsa_agent->wavefront_size) << msg;
|
||||
EXPECT_EQ(agent->simd_arrays_per_engine, hsa_agent->shader_arrs_per_sh_eng) << msg;
|
||||
EXPECT_EQ(agent->max_waves_per_cu, hsa_agent->max_waves_per_cu) << msg;
|
||||
EXPECT_EQ(agent->num_shader_banks, hsa_agent->shader_engs) << msg;
|
||||
EXPECT_EQ(agent->workgroup_max_size, hsa_agent->workgroup_max_size) << msg;
|
||||
EXPECT_EQ(agent->workgroup_max_dim.x, hsa_agent->workgroup_max_dim[0]) << msg;
|
||||
EXPECT_EQ(agent->workgroup_max_dim.y, hsa_agent->workgroup_max_dim[1]) << msg;
|
||||
EXPECT_EQ(agent->workgroup_max_dim.z, hsa_agent->workgroup_max_dim[2]) << msg;
|
||||
EXPECT_EQ(agent->grid_max_size, hsa_agent->grid_max_size) << msg;
|
||||
EXPECT_EQ(agent->grid_max_dim.x, hsa_agent->grid_max_dim.x) << msg;
|
||||
EXPECT_EQ(agent->grid_max_dim.y, hsa_agent->grid_max_dim.y) << msg;
|
||||
EXPECT_EQ(agent->grid_max_dim.z, hsa_agent->grid_max_dim.z) << msg;
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
// HSA lib doesn't set family ID for CPU-only but we do
|
||||
EXPECT_EQ(agent->family_id, hsa_agent->family_id) << msg;
|
||||
}
|
||||
EXPECT_EQ(agent->fw_version.ui32.uCode, hsa_agent->ucode_version) << msg;
|
||||
EXPECT_EQ(agent->sdma_fw_version.uCodeSDMA, hsa_agent->sdma_ucode_version) << msg;
|
||||
|
||||
if(hsa_agent->shader_engs > 0)
|
||||
{
|
||||
EXPECT_EQ(agent->cu_per_engine, hsa_agent->compute_unit / hsa_agent->shader_engs)
|
||||
<< msg;
|
||||
}
|
||||
}
|
||||
|
||||
// clean up memory leak
|
||||
for(auto& itr : _rocm_info.isas)
|
||||
delete[] itr.name_str;
|
||||
}
|
||||
@@ -0,0 +1,478 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "agent.hpp"
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include <grp.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define RET_IF_HSA_INIT_ERR(err) \
|
||||
{ \
|
||||
if((err) != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
CheckInitError(); \
|
||||
RET_IF_HSA_ERR(err); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RET_IF_HSA_ERR(err) \
|
||||
{ \
|
||||
if((err) != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
char err_val[12]; \
|
||||
char* err_str = nullptr; \
|
||||
if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
sprintf(&(err_val[0]), "%#x", (uint32_t) err); \
|
||||
err_str = &(err_val[0]); \
|
||||
} \
|
||||
printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \
|
||||
printf("Call returned %s\n", err_str); \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
// namespace fs = std::filesystem;
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace test
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// Acquire system information
|
||||
hsa_status_t
|
||||
AcquireSystemInfo(system_info_t* sys_info)
|
||||
{
|
||||
hsa_status_t err;
|
||||
|
||||
// Get Major and Minor version of runtime
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major);
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get timestamp frequency
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_info->timestamp_frequency);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get maximum duration of a signal wait operation
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &sys_info->max_wait);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Endianness of the system
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get machine model info
|
||||
err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &sys_info->machine_model);
|
||||
RET_IF_HSA_ERR(err);
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
AcquireAgentInfoEntry(hsa_agent_t agent, agent_info_t* agent_i)
|
||||
{
|
||||
hsa_status_t err;
|
||||
// Get agent name and vendor
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name);
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &agent_i->vendor_name);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get device marketing name
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_PRODUCT_NAME, &agent_i->device_mkt_name);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get agent feature
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_i->agent_feature);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get profile supported by the agent
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_i->agent_profile);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get floating-point rounding mode
|
||||
err = hsa_agent_get_info(
|
||||
agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &agent_i->float_rounding_mode);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max number of queue
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &agent_i->max_queue);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get queue min size
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &agent_i->queue_min_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get queue max size
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_i->queue_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get queue type
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &agent_i->queue_type);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get agent node
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get device type
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_i->device_type);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
if(HSA_DEVICE_TYPE_GPU == agent_i->device_type)
|
||||
{
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa);
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
|
||||
// Get cache size
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, agent_i->cache_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get chip id
|
||||
err =
|
||||
hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID, &agent_i->chip_id);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get cacheline size
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &agent_i->cacheline_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Max clock frequency
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &agent_i->max_clock_freq);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Internal Driver node ID
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &agent_i->internal_node_id);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Max number of watch points on mem. addr. ranges to generate exeception
|
||||
// events
|
||||
err = hsa_agent_get_info(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS,
|
||||
&agent_i->max_addr_watch_pts);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Agent BDFID
|
||||
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Max Memory Clock
|
||||
// Not supported by hsa_agent_get_info
|
||||
// err = hsa_agent_get_info(agent,d
|
||||
// (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY,
|
||||
// &agent_i->mem_max_freq);
|
||||
// RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Num SIMDs per CU
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &agent_i->simds_per_cu);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Num Shader Engines
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &agent_i->shader_engs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get Num Shader Arrays per Shader engine
|
||||
err = hsa_agent_get_info(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
|
||||
&agent_i->shader_arrs_per_sh_eng);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get number of Compute Unit
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &agent_i->compute_unit);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// family id
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID, &agent_i->family_id);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// ucode version
|
||||
err = hsa_agent_get_info(
|
||||
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_UCODE_VERSION, &agent_i->ucode_version);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// sdma ucode version
|
||||
err = hsa_agent_get_info(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION,
|
||||
&agent_i->sdma_ucode_version);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Check if the agent is kernel agent
|
||||
if((agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) != 0)
|
||||
{
|
||||
// Get flaf of fast_f16 operation
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get wavefront size
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max total number of work-items in a workgroup
|
||||
err = hsa_agent_get_info(
|
||||
agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &agent_i->workgroup_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max number of work-items of each dimension of a work-group
|
||||
err = hsa_agent_get_info(
|
||||
agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &agent_i->workgroup_max_dim);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max number of a grid per dimension
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &agent_i->grid_max_dim);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max total number of work-items in a grid
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &agent_i->grid_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get max number of fbarriers per work group
|
||||
err = hsa_agent_get_info(
|
||||
agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &agent_i->fbarrier_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_agent_get_info(agent,
|
||||
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
|
||||
&agent_i->max_waves_per_cu);
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t* pool_i)
|
||||
{
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &pool_i->segment);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Get the size of the POOL
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &pool_i->pool_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &pool_i->alloc_allowed);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &pool_i->alloc_granule);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &pool_i->pool_alloc_alignment);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pool_i->pl_access);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
get_pool_info(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
auto* info = static_cast<rocm_info*>(data);
|
||||
auto& pool_i = info->pools.emplace_back();
|
||||
auto err = AcquirePoolInfo(pool, &pool_i);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
AcquireISAInfo(hsa_isa_t isa, isa_info_t* isa_i)
|
||||
{
|
||||
hsa_status_t err;
|
||||
uint32_t name_len;
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
isa_i->name_str = new char[name_len];
|
||||
if(isa_i->name_str == nullptr)
|
||||
{
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS, isa_i->mach_models);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(
|
||||
isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->def_rounding_modes);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(
|
||||
isa, HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->base_rounding_modes);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION, &isa_i->fast_f16);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM, &isa_i->workgroup_max_dim);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE, &isa_i->workgroup_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM, &isa_i->grid_max_dim);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE, &isa_i->grid_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE, &isa_i->fbarrier_max_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
get_isa_info(hsa_isa_t isa, void* data)
|
||||
{
|
||||
auto* info = static_cast<rocm_info*>(data);
|
||||
isa_info_t& isa_i = info->isas.emplace_back();
|
||||
|
||||
isa_i.name_str = nullptr;
|
||||
RET_IF_HSA_ERR(AcquireISAInfo(isa, &isa_i));
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t
|
||||
AcquireAgentInfo(hsa_agent_t agent, void* data)
|
||||
{
|
||||
auto* info = static_cast<rocm_info*>(data);
|
||||
agent_info_t& agent_i = info->agents.emplace_back();
|
||||
|
||||
RET_IF_HSA_ERR(AcquireAgentInfoEntry(agent, &agent_i));
|
||||
RET_IF_HSA_ERR(hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, data));
|
||||
|
||||
{
|
||||
auto err = hsa_agent_iterate_isas(agent, get_isa_info, data);
|
||||
if(err != HSA_STATUS_ERROR_INVALID_AGENT) RET_IF_HSA_ERR(err);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
CheckInitError()
|
||||
{
|
||||
printf("ROCm initialization failed\n");
|
||||
|
||||
// Check kernel module for ROCk is loaded
|
||||
FILE* fd = popen("lsmod | grep amdgpu", "r");
|
||||
char buf[16];
|
||||
if(fread(buf, 1, sizeof(buf), fd) <= 0)
|
||||
{
|
||||
printf("ROCk module is NOT loaded, possibly no GPU devices\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if user belongs to group "video"
|
||||
// @note: User who are not members of "video"
|
||||
// group cannot access DRM services
|
||||
int status = -1;
|
||||
bool member = false;
|
||||
char gr_name[] = "video";
|
||||
struct group* grp = nullptr;
|
||||
do
|
||||
{
|
||||
grp = getgrent();
|
||||
if(grp == nullptr)
|
||||
{
|
||||
break;
|
||||
}
|
||||
status = memcmp(gr_name, grp->gr_name, sizeof(gr_name));
|
||||
if(status == 0)
|
||||
{
|
||||
member = true;
|
||||
break;
|
||||
}
|
||||
} while(grp != nullptr);
|
||||
|
||||
if(member == false)
|
||||
{
|
||||
printf("User is not member of \"video\" group\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Print out all static information known to HSA about the target system.
|
||||
// Throughout this program, the Acquire-type functions make HSA calls to
|
||||
// interate through HSA objects and then perform HSA get_info calls to
|
||||
// acccumulate information about those objects. Corresponding to each
|
||||
// Acquire-type function is a Display* function which display the
|
||||
// accumulated data in a formatted way.
|
||||
int
|
||||
get_info(rocm_info& info)
|
||||
{
|
||||
RET_IF_HSA_INIT_ERR(hsa_init());
|
||||
|
||||
// This function will call HSA get_info functions to gather information
|
||||
// about the system.
|
||||
RET_IF_HSA_ERR(AcquireSystemInfo(&info.system));
|
||||
|
||||
RET_IF_HSA_ERR(hsa_iterate_agents(AcquireAgentInfo, &info));
|
||||
|
||||
RET_IF_HSA_ERR(hsa_shut_down());
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
#undef RET_IF_HSA_ERR
|
||||
} // namespace test
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,149 @@
|
||||
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include "fmt/core.h"
|
||||
#include "fmt/ranges.h"
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace test
|
||||
{
|
||||
// This structure holds system information acquired through hsa info related
|
||||
// calls, and is later used for reference when displaying the information.
|
||||
struct system_info_t
|
||||
{
|
||||
uint16_t major = 0;
|
||||
uint16_t minor = 0;
|
||||
uint64_t timestamp_frequency = 0;
|
||||
uint64_t max_wait = 0;
|
||||
hsa_endianness_t endianness = {};
|
||||
hsa_machine_model_t machine_model = {};
|
||||
};
|
||||
|
||||
// This structure holds agent information acquired through hsa info related
|
||||
// calls, and is later used for reference when displaying the information.
|
||||
struct agent_info_t
|
||||
{
|
||||
char name[64] = {'\0'};
|
||||
char vendor_name[64] = {'\0'};
|
||||
char device_mkt_name[64] = {'\0'};
|
||||
hsa_agent_feature_t agent_feature = {};
|
||||
hsa_profile_t agent_profile = {};
|
||||
hsa_default_float_rounding_mode_t float_rounding_mode = {};
|
||||
uint32_t max_queue = 0;
|
||||
uint32_t queue_min_size = 0;
|
||||
uint32_t queue_max_size = 0;
|
||||
hsa_queue_type_t queue_type = {};
|
||||
uint32_t node = 0;
|
||||
hsa_device_type_t device_type = {};
|
||||
uint32_t cache_size[4] = {0, 0, 0, 0};
|
||||
uint32_t chip_id = 0;
|
||||
uint32_t cacheline_size = 0;
|
||||
uint32_t max_clock_freq = 0;
|
||||
uint32_t internal_node_id = 0;
|
||||
uint32_t max_addr_watch_pts = 0;
|
||||
uint32_t family_id = 0;
|
||||
uint32_t ucode_version = 0;
|
||||
uint32_t sdma_ucode_version = 0;
|
||||
// HSA_AMD_AGENT_INFO_MEMORY_WIDTH is deprecated, so exclude
|
||||
// uint32_t mem_max_freq; Not supported by get_info
|
||||
uint32_t compute_unit = 0;
|
||||
uint32_t wavefront_size = 0;
|
||||
uint32_t workgroup_max_size = 0;
|
||||
uint32_t grid_max_size = 0;
|
||||
uint32_t fbarrier_max_size = 0;
|
||||
uint32_t max_waves_per_cu = 0;
|
||||
uint32_t simds_per_cu = 0;
|
||||
uint32_t shader_engs = 0;
|
||||
uint32_t shader_arrs_per_sh_eng = 0;
|
||||
hsa_isa_t agent_isa = {};
|
||||
hsa_dim3_t grid_max_dim = {0, 0, 0};
|
||||
uint16_t workgroup_max_dim[3] = {0, 0, 0};
|
||||
uint16_t bdf_id = 0;
|
||||
bool fast_f16 = false;
|
||||
};
|
||||
|
||||
// This structure holds memory pool information acquired through hsa info
|
||||
// related calls, and is later used for reference when displaying the
|
||||
// information.
|
||||
struct pool_info_t
|
||||
{
|
||||
uint32_t segment = 0;
|
||||
size_t pool_size = 0;
|
||||
bool alloc_allowed = false;
|
||||
size_t alloc_granule = 0;
|
||||
size_t pool_alloc_alignment = 0;
|
||||
bool pl_access = false;
|
||||
uint32_t global_flag = 0;
|
||||
};
|
||||
|
||||
// This structure holds ISA information acquired through hsa info
|
||||
// related calls, and is later used for reference when displaying the
|
||||
// information.
|
||||
struct isa_info_t
|
||||
{
|
||||
char* name_str = nullptr;
|
||||
uint32_t workgroup_max_size = 0;
|
||||
hsa_dim3_t grid_max_dim = {0, 0, 0};
|
||||
uint64_t grid_max_size = 0;
|
||||
uint32_t fbarrier_max_size = 0;
|
||||
uint16_t workgroup_max_dim[3] = {0, 0, 0};
|
||||
bool def_rounding_modes[3] = {false, false, false};
|
||||
bool base_rounding_modes[3] = {false, false, false};
|
||||
bool mach_models[2] = {false, false};
|
||||
bool profiles[2] = {false, false};
|
||||
bool fast_f16 = false;
|
||||
};
|
||||
|
||||
// This structure holds cache information acquired through hsa info
|
||||
// related calls, and is later used for reference when displaying the
|
||||
// information.
|
||||
struct cache_info_t
|
||||
{
|
||||
char* name_str = nullptr;
|
||||
uint8_t level = 0;
|
||||
uint32_t size = 0;
|
||||
};
|
||||
|
||||
struct rocm_info
|
||||
{
|
||||
system_info_t system = {};
|
||||
std::vector<agent_info_t> agents = {};
|
||||
std::vector<pool_info_t> pools = {};
|
||||
std::vector<isa_info_t> isas = {};
|
||||
};
|
||||
|
||||
int
|
||||
get_info(rocm_info& info);
|
||||
} // namespace test
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,41 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(rocprofiler_lib, timestamp)
|
||||
{
|
||||
auto beg = rocprofiler::common::timestamp_ns();
|
||||
auto mid = rocprofiler_timestamp_t{};
|
||||
auto ret = rocprofiler_get_timestamp(&mid);
|
||||
auto end = rocprofiler::common::timestamp_ns();
|
||||
|
||||
EXPECT_EQ(ret, ROCPROFILER_STATUS_SUCCESS);
|
||||
EXPECT_GT(beg, 0);
|
||||
EXPECT_GT(mid, beg);
|
||||
EXPECT_GT(end, mid);
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <rocprofiler/fwd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "rocprofiler/version.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(rocprofiler_lib, version)
|
||||
{
|
||||
auto correct_version = std::tuple<uint32_t, uint32_t, uint32_t>(
|
||||
ROCPROFILER_VERSION_MAJOR, ROCPROFILER_VERSION_MINOR, ROCPROFILER_VERSION_PATCH);
|
||||
auto query_version = std::tuple<uint32_t, uint32_t, uint32_t>(0, 0, 0);
|
||||
auto query_version_copy = std::tuple<uint32_t, uint32_t, uint32_t>(0, 0, 0);
|
||||
|
||||
auto ret0 = rocprofiler_get_version(&std::get<0>(query_version), nullptr, nullptr);
|
||||
auto ret1 = rocprofiler_get_version(nullptr, &std::get<1>(query_version), nullptr);
|
||||
auto ret2 = rocprofiler_get_version(nullptr, nullptr, &std::get<2>(query_version));
|
||||
|
||||
EXPECT_EQ(ret0, ROCPROFILER_STATUS_SUCCESS);
|
||||
EXPECT_EQ(ret1, ROCPROFILER_STATUS_SUCCESS);
|
||||
EXPECT_EQ(ret2, ROCPROFILER_STATUS_SUCCESS);
|
||||
EXPECT_EQ(query_version, correct_version);
|
||||
|
||||
auto reta = rocprofiler_get_version(&std::get<0>(query_version_copy),
|
||||
&std::get<1>(query_version_copy),
|
||||
&std::get<2>(query_version_copy));
|
||||
EXPECT_EQ(reta, ROCPROFILER_STATUS_SUCCESS);
|
||||
EXPECT_EQ(query_version_copy, correct_version);
|
||||
EXPECT_EQ(query_version_copy, query_version);
|
||||
}
|
||||
@@ -20,4 +20,4 @@ gtest_add_tests(
|
||||
TEST_LIST buffering-tests_TESTS
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
|
||||
set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 120 LABELS "unittests")
|
||||
|
||||
Reference in New Issue
Block a user