Agent information w/o using hsa-runtime (#100)

* Agent information w/o using hsa-runtime

- remove lib/rocprofiler/hsa/agent.{hpp,cpp}
- update include/rocprofiler/agent.h
  - basically all possible info from /sys/class/kfd/kfd/topology/nodes/*

* Print topology in rocprofiler_lib.agent test

- hack to help diagnose errors

* Update lib/rocprofiler/tests/details/agent.cpp

- use LOG_IF(WARNING, ...) instead of LOG_IF(FATAL, ...)

* Update lib/rocprofiler/tests/agent.cpp

- print rocminfo at beginning of test
- fix comparison of agent handle
- misc other checks

* Updte lib/rocprofiler/agent.cpp

- handle unreadable /sys/class/kfd/kfd/topology/nodes/<N>/properties file

* Update lib/tests/buffering/CMakeLists.txt

- increase timeout to 120
  - buffering.parallel will timeout when thread sanitizing is enabled

* Update cmake: rocprofiler-drm

- find drm headers and libraries

* Update include/rocprofiler/agent.h

- add family_id field

* Update lib/rocprofiler/agent.cpp

- parse /proc/cpuinfo for name, family, apicid, etc.
- read_topology uses unique pointers to cleanup memory allocations
- implement name and gfxip

* Update lib/rocprofiler/tests/agent.cpp

- improved failure message
- check name/gfxip
- remove check against hsa_agent_t.handle
  - this value is dependent on the address of C++ class

* Update lib/rocprofiler/tests/details/agent.cpp

- tweak gfxip_ variable which is broken for CPU

* Update lib/rocprofiler/agent.cpp

- update string handling for name and gfxip

* Update lib/rocprofiler/tests/agent.cpp

- minor output tweak

* Update lib/rocprofiler/registration.{hpp,cpp}

- registration::init_logging() function

* Update lib/rocprofiler/agent.cpp

- fix hex handling of GFX step version

* Update lib/rocprofiler/tests/details/agent.cpp

- fix format string when nearest CPUs not found

* Update lib/rocprofiler/tests/CMakeLists.txt

- exclude details/agent.cpp from being parsed for gtest tests

* Update include/rocprofiler/fwd.h

- add ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI status

* Update lib/rocprofiler/tests/details/agent.{hpp,cpp}

- replace with slightly modified implementation of rocminfo
  - primary change was not printing

* Update lib/rocprofiler/tests/agent.cpp

- update test to use rocminfo data

* Update lib/rocprofiler/agent.cpp

- add pc_sampling_configs
- return error on incompatible ABI

* Update counters and counters tests

- rename test names for consistency
- fixed incorrect spelling of derived

* Add lib/rocprofiler/tests/{timestamp,version}.cpp

- add timestamp and version tests for rocprofiler_get_timestamp and rocprofiler_get_version, respectively

* Update lib/rocprofiler/tests/agent

- fix double free of name_str from isa_info_t

* Update include/rocprofiler/agent.h

- comments for rocprofiler_agent_mem_bank_t
- add rocprofiler_dim3_t
- comments for rocprofiler_agent_t
- add new fields to rocprofiler_agent_t
    - cu_count
    - workgroup_max_size
    - workgroup_max_dim
    - grid_max_size
    - grid_max_dim
    - vendor_name
    - product_name
- change prototype of rocprofiler_available_agents_cb_t to be const agent**

* Update lib/rocprofiler/agent.cpp

- set size field
- implement:
  - product_name
  - vendor_name
  - workgroup_max_size
  - workgroup_max_dim
  - grid_max_size
  - grid_max_dim
  - cu_count

* Update lib/rocprofiler/tests/agent.cpp

- changes for const agent*

* Update samples/pc_sampling

- updates for const agent*

* Update lib/rocprofiler/agent.cpp

- fix ABI compatibility check
  - return incompatible if tool agent is larger than our agent

* Update include/rocprofiler/agent.h

- doxygen comments
- make size field of rocprofiler_agent_t uint64_t for consistency
- add gpu_id via /sys/class/kfd/kfd/.../<idx>/gpu_id
- add model_name via /sys/class/kfd/kfd/.../<idx>/name

* Update lib/rocprofiler/agent.cpp

- add read_file function (vector of strings)
- support enum in read_property
- assign model_name and gpu_id fields
- remove unique_id

* Update lib/rocprofiler/tests/details/agent.*

- support family id, ucode_version, sdma_ucode_version

* Update lib/rocprofiler/tests/agent.cpp

- Add rocprofiler_lib.agent_abi test
- Verify family_id, ucode_version, sdma_ucode_version
This commit is contained in:
Jonathan R. Madsen
2023-10-10 18:10:23 -05:00
committed by GitHub
parent 2d533ad91e
commit a798a26227
24 changed files with 1718 additions and 772 deletions
+36
View File
@@ -168,3 +168,39 @@ find_library(
PATHS ${rocm_version_DIR} ${ROCM_PATH})
target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_library})
# ----------------------------------------------------------------------------------------#
#
# drm
#
# ----------------------------------------------------------------------------------------#
find_path(
drm_INCLUDE_DIR
NAMES drm.h
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATH_SUFFIXES include/drm include REQUIRED)
find_path(
xf86drm_INCLUDE_DIR
NAMES xf86drm.h
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATH_SUFFIXES include/drm include REQUIRED)
find_library(
drm_LIBRARY
NAMES drm
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED)
find_library(
drm_amdgpu_LIBRARY
NAMES drm_amdgpu
HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu
PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu REQUIRED)
target_include_directories(rocprofiler-drm SYSTEM INTERFACE ${drm_INCLUDE_DIR}
${xf86drm_INCLUDE_DIR})
target_link_libraries(rocprofiler-drm INTERFACE ${drm_LIBRARY} ${drm_amdgpu_LIBRARY})
+1
View File
@@ -51,3 +51,4 @@ rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" IN
rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL)
rocprofiler_add_interface_library(rocprofiler-drm "drm (amdgpu) library" INTERNAL)
+1 -1
View File
@@ -37,7 +37,7 @@ const std::string_view MI200_NAME = "gfx90a";
}
static rocprofiler_status_t
find_first_gpu_agent_impl(rocprofiler_agent_t** agents, size_t num_agents, void* data)
find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, void* data)
{
// data is required
if(!data) return ROCPROFILER_STATUS_ERROR;
@@ -18,9 +18,9 @@ std::vector<rocprofiler_context_id_t> contexts;
std::vector<rocprofiler_buffer_id_t> buffer_ids;
rocprofiler_status_t
find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_t** agents,
size_t num_agents,
void* data)
find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agents,
size_t num_agents,
void* data)
{
// data is required
if(!data) return ROCPROFILER_STATUS_ERROR;
+145 -7
View File
@@ -25,6 +25,10 @@
#include <rocprofiler/defines.h>
#include <rocprofiler/fwd.h>
#include <hsakmt/hsakmttypes.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
/**
@@ -34,14 +38,147 @@ ROCPROFILER_EXTERN_C_INIT
* @{
*/
/**
* @brief Cache information for an agent.
*/
typedef struct rocprofiler_agent_cache_t
{
uint64_t processor_id_low; ///< Identifies the processor number
uint64_t size; ///< Size of the cache
uint32_t level; ///< Integer representing level: 1, 2, 3, 4, etc.
uint32_t cache_line_size; ///< Cache line size in bytes
uint32_t cache_lines_per_tag; ///< Cache lines per Cache Tag
uint32_t association; ///< Cache Associativity
uint32_t latency; ///< Cache latency in ns
HsaCacheType type;
} rocprofiler_agent_cache_t;
/**
* @brief IO link information for an agent.
*/
typedef struct rocprofiler_agent_io_link_t
{
HSA_IOLINKTYPE type; ///< Discoverable IoLink Properties (optional)
uint32_t version_major; ///< Bus interface version (optional)
uint32_t version_minor; ///< Bus interface version (optional)
uint32_t node_from; ///< See @ref rocprofiler_agent_id_t
uint32_t node_to; ///< See @ref rocprofiler_agent_id_t
uint32_t weight; ///< weight factor (derived from CDIT)
uint32_t min_latency; ///< minimum cost of time to transfer (rounded to ns)
uint32_t max_latency; ///< maximum cost of time to transfer (rounded to ns)
uint32_t min_bandwidth; ///< minimum interface Bandwidth in MB/s
uint32_t max_bandwidth; ///< maximum interface Bandwidth in MB/s
uint32_t recommended_transfer_size; ///< recommended transfer size to reach maximum bandwidth
///< in bytes
HSA_LINKPROPERTY flags; ///< override flags (may be active for specific platforms)
} rocprofiler_agent_io_link_t;
/**
* @brief Memory bank information for an agent.
*/
typedef struct rocprofiler_agent_mem_bank_t
{
HSA_HEAPTYPE heap_type;
HSA_MEMORYPROPERTY flags;
uint32_t width; ///< the number of parallel bits of the memoryinterface
uint32_t mem_clk_max; ///< clock for the memory, this allows computing the available
///< bandwidth to the memory when needed
uint64_t size_in_bytes; ///< physical memory size of the memory range in bytes
} rocprofiler_agent_mem_bank_t;
/**
* @brief Multi-dimensional struct of data
*/
typedef struct rocprofiler_dim3_t
{
uint32_t x;
uint32_t y;
uint32_t z;
} rocprofiler_dim3_t;
/**
* @brief Agent.
*/
typedef struct
typedef struct rocprofiler_agent_t
{
rocprofiler_agent_id_t id;
rocprofiler_agent_type_t type;
const char* name;
uint64_t size; ///< set to sizeof(rocprofiler_agent_t) by rocprofiler. This can be used for
///< versioning and compatibility handling
rocprofiler_agent_id_t id; ///< Internal opaque identifier
rocprofiler_agent_type_t type; ///< Enumeration for identifying the agent type (CPU, GPU, etc.)
uint32_t cpu_cores_count; ///< # of latency (= CPU) cores present on this HSA node. This value
///< is 0 for a HSA node with no such cores, e.g a "discrete HSA GPU"
uint32_t simd_count; ///< # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a
///< node. This value is 0 if no FCompute cores are present (e.g. pure
///< "CPU node").
uint32_t mem_banks_count; ///< # of discoverable memory bank affinity properties on this
///< "H-NUMA" node.
uint32_t caches_count; ///< # of discoverable cache affinity properties on this "H-NUMA" node.
uint32_t io_links_count; ///< # of discoverable IO link affinity properties of this node
///< connecting to other nodes.
uint32_t cpu_core_id_base; ///< low value of the logical processor ID of the latency (= CPU)
///< cores available on this node
uint32_t simd_id_base; ///< low value of the logical processor ID of the throughput (= GPU)
///< units available on this node
uint32_t max_waves_per_simd; ///< This identifies the max. number of launched waves per SIMD.
///< If NumFComputeCores is 0, this value is ignored.
uint32_t lds_size_in_kb; ///< Size of Local Data Store in Kilobytes per SIMD Wavefront
uint32_t gds_size_in_kb; ///< Size of Global Data Store in Kilobytes shared across SIMD
///< Wavefronts
uint32_t num_gws; ///< Number of GWS barriers
uint32_t wave_front_size; ///< Number of SIMD cores per wavefront executed, typically 64, may
///< be 32 or a different value for some HSA based architectures
uint32_t num_xcc; ///< Number of XCC
uint32_t cu_count; ///< Number of compute units
uint32_t array_count; ///< Number of SIMD arrays
uint32_t num_shader_banks; ///< Number of Shader Banks or Shader Engines, typical values are 1
///< or 2
uint32_t simd_arrays_per_engine; ///< Number of SIMD arrays per engine
uint32_t cu_per_simd_array; ///< Number of Compute Units (CU) per SIMD array
uint32_t simd_per_cu; ///< Number of SIMD representing a Compute Unit (CU)
uint32_t max_slots_scratch_cu; ///< Number of temp. memory ("scratch") wave slots available to
///< access, may be 0 if HW has no restrictions
uint32_t gfx_target_version; ///< major_version=((value / 10000) % 100)
///< minor_version=((value / 100) % 100)
///< patch_version=(value % 100)
uint16_t vendor_id; ///< GPU vendor id; 0 on latency (= CPU)-only nodes
uint16_t device_id; ///< GPU device id; 0 on latency (= CPU)-only nodes
uint32_t location_id; ///< GPU BDF (Bus/Device/function number) - identifies the device
///< location in the overall system
uint32_t domain; ///< PCI domain of the GPU
uint32_t drm_render_minor; ///< DRM render device minor device number
uint32_t num_sdma_engines; ///< number of PCIe optimized SDMA engines
uint32_t num_sdma_xgmi_engines; ///< number of XGMI optimized SDMA engines
uint32_t num_sdma_queues_per_engine; ///< number of SDMA queue per one engine
uint32_t num_cp_queues; ///< number of Compute queues
uint32_t max_engine_clk_ccompute; ///< maximum engine clocks for CPU, including any boost
///< capabilities
uint32_t max_engine_clk_fcompute; ///< GPU only. Maximum engine clocks for GPU, including any
///< boost capabilities
HSA_ENGINE_VERSION sdma_fw_version; ///< GPU only
HSA_ENGINE_ID
fw_version; ///< GPU only. Identifier (rev) of the GPU uEngine or Firmware, may be 0
HSA_CAPABILITY capability; ///< GPU only
uint32_t cu_per_engine; ///< computed
uint32_t max_waves_per_cu; ///< computed
uint32_t family_id; ///< Family code
uint32_t workgroup_max_size; ///< GPU only. Maximum total number of work-items in a work-group.
uint32_t grid_max_size; ///< GPU only. Maximum number of fbarriers per work-group. Must be at
///< least 32.
uint64_t local_mem_size; ///< GPU only. Local memory size
uint64_t hive_id; ///< XGMI Hive the GPU node belongs to in the system. It is an opaque and
///< static number hash created by the PSP
uint64_t gpu_id; ///< GPU only. KFD identifier
rocprofiler_dim3_t workgroup_max_dim; ///< GPU only. Maximum number of work-items of each
///< dimension of a work-group.
rocprofiler_dim3_t grid_max_dim; ///< GPU only. Maximum number of work-items of each dimension
///< of a grid.
rocprofiler_agent_mem_bank_t* mem_banks;
rocprofiler_agent_cache_t* caches;
rocprofiler_agent_io_link_t* io_links;
const char* name; ///< Name of the agent. Will be identical to product name for CPU
const char* vendor_name; ///< Vendor of agent (will be AMD)
const char* product_name; ///< Marketing name
const char* model_name; ///< GPU only. Will be something like vega20, mi200, etc.
rocprofiler_pc_sampling_config_array_t pc_sampling_configs;
} rocprofiler_agent_t;
@@ -53,9 +190,10 @@ typedef struct
* @param [in] user_data Data pointer passback
* @return ::rocprofiler_status_t
*/
typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(rocprofiler_agent_t** agents,
size_t num_agents,
void* user_data);
typedef rocprofiler_status_t (*rocprofiler_available_agents_cb_t)(
const rocprofiler_agent_t** agents,
size_t num_agents,
void* user_data);
/**
* @brief Receive synchronous callback with an array of available agents at moment of invocation
+2
View File
@@ -67,6 +67,8 @@ typedef enum // NOLINT(performance-enum-size)
///< rocprofiler configuration (i.e.
///< function called post-initialization)
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible
///< with current version of rocprofiler
ROCPROFILER_STATUS_LAST,
} rocprofiler_status_t;
+2 -1
View File
@@ -30,6 +30,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler::rocprofiler-amd-comgr>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-runtime>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-ptl>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>)
$<BUILD_INTERFACE:rocprofiler::rocprofiler-hsa-aql>
$<BUILD_INTERFACE:rocprofiler::rocprofiler-drm>)
set_target_properties(rocprofiler-common-library PROPERTIES OUTPUT_NAME
rocprofiler-common)
+549 -34
View File
@@ -24,55 +24,570 @@
#include <rocprofiler/fwd.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/rocprofiler/hsa/agent.hpp"
#include <fmt/core.h>
#include <glog/logging.h>
#include <libdrm/amdgpu.h>
#include <xf86drm.h>
#include <filesystem>
#include <fstream>
#include <limits>
#include <regex>
#include <sstream>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <vector>
namespace rocprofiler
{
namespace agent
{
namespace
{
namespace fs = ::std::filesystem;
struct cpu_info
{
long processor = -1;
long family = -1;
long model = -1;
long physical_id = -1;
long core_id = -1;
long apicid = -1;
std::string vendor_id = {};
std::string model_name = {};
bool is_valid() const
{
return !(processor < 0 || family < 0 || model < 0 || physical_id < 0 || core_id < 0 ||
apicid < 0 || vendor_id.empty() || model_name.empty());
}
};
auto
parse_cpu_info()
{
auto ifs = std::ifstream{"/proc/cpuinfo"};
auto data = std::vector<cpu_info>{};
if(!ifs) return data;
auto read_blocks = [&ifs]() {
auto blocks = std::vector<std::vector<std::string>>{};
auto current_block = std::vector<std::string>{};
auto line = std::string{};
while(std::getline(ifs, line))
{
if(ifs.eof())
{
if(!current_block.empty()) blocks.emplace_back(std::move(current_block));
break;
}
if(line.empty())
{
if(!current_block.empty()) blocks.emplace_back(std::move(current_block));
current_block.clear();
}
else
{
current_block.emplace_back(line);
}
}
return blocks;
};
auto processor_blocks = read_blocks();
auto processor_info = std::vector<cpu_info>{};
processor_info.reserve(processor_blocks.size());
for(const auto& bitr : processor_blocks)
{
auto info_v = cpu_info{};
for(const auto& itr : bitr)
{
auto match = std::smatch{};
const std::regex re{".*: (.*)$"};
if(std::regex_match(itr, match, re))
{
if(match.size() == 2)
{
std::ssub_match value = match[1];
if(itr.find("vendor_id") == 0)
info_v.vendor_id = value.str();
else if(itr.find("model name") == 0)
info_v.model_name = value.str();
else if(itr.find("processor") == 0)
info_v.processor = std::stol(value.str());
else if(itr.find("cpu family") == 0)
info_v.family = std::stol(value.str());
else if(itr.find("model") == 0 && itr.find("model name") != 0)
info_v.model = std::stol(value.str());
else if(itr.find("physical id") == 0)
info_v.physical_id = std::stol(value.str());
else if(itr.find("core id") == 0)
info_v.core_id = std::stol(value.str());
else if(itr.find("apicid") == 0)
info_v.apicid = std::stol(value.str());
}
}
}
if(info_v.is_valid())
processor_info.emplace_back(info_v);
else
{
LOG(ERROR) << "Invalid processor info: "
<< fmt::format("processor={}, vendor={}, family={}, model={}, name={}, "
"physical id={}, core id={}, apicid={}",
info_v.processor,
info_v.vendor_id,
info_v.family,
info_v.model,
info_v.model_name,
info_v.physical_id,
info_v.core_id,
info_v.apicid);
}
}
return processor_info;
}
auto&
get_cpu_info()
{
static auto _v = parse_cpu_info();
return _v;
}
auto
read_file(const std::string& fname)
{
auto data = std::vector<std::string>{};
auto ifs = std::ifstream{fname};
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
while(true)
{
auto value = std::string{};
ifs >> value;
if(ifs.eof()) break;
if(!value.empty()) data.emplace_back(value);
}
return data;
}
auto
read_map(const std::string& fname)
{
auto data = std::unordered_map<std::string, std::string>{};
auto ifs = std::ifstream{fname};
if(!ifs) throw std::runtime_error{fmt::format("file '{}' cannot be read", fname)};
while(true)
{
auto label = std::string{};
ifs >> label;
if(ifs.eof()) break;
auto entry = std::string{};
ifs >> entry;
if(ifs.eof())
throw std::runtime_error{
fmt::format("unexpected file format in '{}' at {}", fname, label)};
auto ret = data.emplace(label, entry);
if(!ret.second)
throw std::runtime_error{fmt::format("duplicate entry in '{}': {}", fname, label)};
}
return data;
}
template <typename MapT, typename Tp>
void
read_property(const MapT& data, const std::string& label, Tp& value)
{
if constexpr(std::is_enum<Tp>::value)
{
using value_type = std::underlying_type_t<Tp>;
// never expect this to be true but it does guard against infinite recursion
static_assert(!std::is_enum<value_type>::value, "Expected non-enum type");
auto value_v = static_cast<value_type>(value);
read_property(data, label, value_v);
value = static_cast<Tp>(value_v);
}
else
{
static_assert(std::is_integral<Tp>::value, "Expected integral type");
using value_type = std::conditional_t<std::is_signed<Tp>::value, intmax_t, uintmax_t>;
if(data.find(label) == data.end())
{
LOG(ERROR) << "agent properties map missing " << label << " entry";
return;
}
auto iss = std::istringstream{data.at(label)};
value_type local_value;
iss >> local_value;
// verify that we have used the correct data sizes
constexpr auto min_value = std::numeric_limits<Tp>::min();
constexpr auto max_value = std::numeric_limits<Tp>::max();
if(local_value < min_value)
{
throw std::runtime_error{
fmt::format("data with label {} has a value (={}) which is less "
"than the min value for the type (={})",
label,
local_value,
min_value)};
}
else if(local_value > max_value)
{
throw std::runtime_error{fmt::format("data with label {} has a value (={}) which is "
"greater "
"than the max value for the type (={})",
label,
local_value,
max_value)};
}
value = static_cast<Tp>(local_value);
}
}
constexpr auto
compute_version(uint32_t major_v, uint32_t minor_v, uint32_t patch_v)
{
return (major_v * 10000) + (minor_v * 100) + patch_v;
}
auto
read_topology()
{
using unique_agent_t = std::unique_ptr<rocprofiler_agent_t, void (*)(rocprofiler_agent_t*)>;
auto sysfs_nodes_path = fs::path{"/sys/class/kfd/kfd/topology/nodes/"};
if(!fs::exists(sysfs_nodes_path))
throw std::runtime_error{
fmt::format("sysfs nodes path '{}' does not exist", sysfs_nodes_path.string())};
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
auto mi200_pc_sampling_config = pc_sampling_config_vec_t{
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
1UL,
1000000000UL,
0}};
const auto& cpu_info_v = get_cpu_info();
auto data = std::vector<unique_agent_t>{};
uint64_t n = 0;
while(true)
{
auto idx = n++;
auto node_path = sysfs_nodes_path / std::to_string(idx);
if(!fs::exists(node_path)) break;
auto properties = std::unordered_map<std::string, std::string>{};
auto name_prop = std::vector<std::string>{};
auto gpu_id_prop = std::vector<std::string>{};
try
{
properties = read_map(node_path / "properties");
name_prop = read_file(node_path / "name");
gpu_id_prop = read_file(node_path / "gpu_id");
} catch(std::runtime_error& e)
{
LOG(ERROR) << "Error reading '" << (node_path / "properties").string()
<< "' :: " << e.what();
continue;
}
auto agent_info = rocprofiler_agent_t{};
memset(&agent_info, 0, sizeof(agent_info));
agent_info.size = sizeof(rocprofiler_agent_t);
agent_info.id.handle = idx;
agent_info.type = ROCPROFILER_AGENT_TYPE_NONE;
if(!name_prop.empty())
agent_info.model_name = strdup(name_prop.front().c_str());
else
agent_info.model_name = "";
if(!gpu_id_prop.empty()) agent_info.gpu_id = std::stoull(gpu_id_prop.front());
read_property(properties, "cpu_cores_count", agent_info.cpu_cores_count);
read_property(properties, "simd_count", agent_info.simd_count);
if(agent_info.cpu_cores_count > 0)
agent_info.type = ROCPROFILER_AGENT_TYPE_CPU;
else if(agent_info.simd_count > 0)
agent_info.type = ROCPROFILER_AGENT_TYPE_GPU;
read_property(properties, "mem_banks_count", agent_info.mem_banks_count);
read_property(properties, "caches_count", agent_info.caches_count);
read_property(properties, "io_links_count", agent_info.io_links_count);
read_property(properties, "cpu_core_id_base", agent_info.cpu_core_id_base);
read_property(properties, "simd_id_base", agent_info.simd_id_base);
read_property(properties, "max_waves_per_simd", agent_info.max_waves_per_simd);
read_property(properties, "lds_size_in_kb", agent_info.lds_size_in_kb);
read_property(properties, "gds_size_in_kb", agent_info.gds_size_in_kb);
read_property(properties, "num_gws", agent_info.num_gws);
read_property(properties, "wave_front_size", agent_info.wave_front_size);
read_property(properties, "array_count", agent_info.array_count);
read_property(properties, "simd_arrays_per_engine", agent_info.simd_arrays_per_engine);
read_property(properties, "cu_per_simd_array", agent_info.cu_per_simd_array);
read_property(properties, "simd_per_cu", agent_info.simd_per_cu);
read_property(properties, "max_slots_scratch_cu", agent_info.max_slots_scratch_cu);
read_property(properties, "gfx_target_version", agent_info.gfx_target_version);
read_property(properties, "vendor_id", agent_info.vendor_id);
read_property(properties, "device_id", agent_info.device_id);
read_property(properties, "location_id", agent_info.location_id);
read_property(properties, "domain", agent_info.domain);
read_property(properties, "drm_render_minor", agent_info.drm_render_minor);
read_property(properties, "hive_id", agent_info.hive_id);
read_property(properties, "num_sdma_engines", agent_info.num_sdma_engines);
read_property(properties, "num_sdma_xgmi_engines", agent_info.num_sdma_xgmi_engines);
read_property(
properties, "num_sdma_queues_per_engine", agent_info.num_sdma_queues_per_engine);
read_property(properties, "num_cp_queues", agent_info.num_cp_queues);
read_property(properties, "max_engine_clk_ccompute", agent_info.max_engine_clk_ccompute);
agent_info.name = "";
agent_info.product_name = "";
agent_info.vendor_name = "";
if(agent_info.type == ROCPROFILER_AGENT_TYPE_GPU)
{
constexpr auto workgrp_max = 1024;
constexpr auto grid_max = std::numeric_limits<uint32_t>::max();
read_property(
properties, "max_engine_clk_fcompute", agent_info.max_engine_clk_fcompute);
read_property(properties, "local_mem_size", agent_info.local_mem_size);
read_property(properties, "fw_version", agent_info.fw_version.Value);
read_property(properties, "capability", agent_info.capability.Value);
read_property(properties, "sdma_fw_version", agent_info.sdma_fw_version.Value);
agent_info.fw_version.Value &= 0x3ff;
agent_info.sdma_fw_version.Value &= 0x3ff;
agent_info.workgroup_max_size = workgrp_max; // hardcoded in hsa-runtime
agent_info.workgroup_max_dim = {workgrp_max, workgrp_max, workgrp_max};
agent_info.grid_max_size = grid_max; // hardcoded in hsa-runtime
agent_info.grid_max_dim = {grid_max, grid_max, grid_max};
agent_info.cu_count = agent_info.simd_count / agent_info.simd_per_cu;
if(int drm_fd = 0; (drm_fd = drmOpenRender(agent_info.drm_render_minor)) >= 0)
{
uint32_t major_version = 0;
uint32_t minor_version = 0;
auto* device_handle = amdgpu_device_handle{};
if(amdgpu_device_initialize(
drm_fd, &major_version, &minor_version, &device_handle) == 0)
{
auto major = (agent_info.gfx_target_version / 10000) % 100;
auto minor = (agent_info.gfx_target_version / 100) % 100;
auto step = (agent_info.gfx_target_version % 100);
agent_info.name =
strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str());
agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle));
agent_info.vendor_name = strdup("AMD");
amdgpu_gpu_info gpu_info = {};
if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0)
{
agent_info.family_id = gpu_info.family_id;
}
amdgpu_device_deinitialize(device_handle);
}
drmClose(drm_fd);
}
constexpr auto gfx90a_version = compute_version(9, 0, 10);
if(agent_info.gfx_target_version >= gfx90a_version)
{
agent_info.pc_sampling_configs = rocprofiler_pc_sampling_config_array_t{
mi200_pc_sampling_config.data(), mi200_pc_sampling_config.size()};
}
}
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
{
agent_info.cu_count = agent_info.cpu_cores_count;
agent_info.vendor_name = strdup("CPU");
for(const auto& itr : cpu_info_v)
{
if(agent_info.cpu_core_id_base == itr.apicid)
{
agent_info.name = strdup(itr.model_name.c_str());
agent_info.product_name = strdup(agent_info.name);
agent_info.family_id = itr.family;
break;
}
}
}
if(properties.count("num_xcc") > 0)
read_property(properties, "num_xcc", agent_info.num_xcc);
else
agent_info.num_xcc = 1;
agent_info.max_waves_per_cu = agent_info.simd_per_cu * agent_info.max_waves_per_simd;
if(agent_info.simd_arrays_per_engine > 0)
{
agent_info.num_shader_banks =
agent_info.array_count / agent_info.simd_arrays_per_engine;
// depends on above
if(agent_info.num_shader_banks * agent_info.simd_arrays_per_engine > 0)
{
agent_info.cu_per_engine =
(agent_info.simd_count / agent_info.simd_per_cu) /
(agent_info.num_shader_banks * agent_info.simd_arrays_per_engine);
}
}
agent_info.mem_banks = nullptr;
agent_info.caches = nullptr;
agent_info.io_links = nullptr;
if(agent_info.mem_banks_count > 0)
{
agent_info.mem_banks = new rocprofiler_agent_mem_bank_t[agent_info.mem_banks_count];
for(uint32_t i = 0; i < agent_info.mem_banks_count; ++i)
{
using heap_type_t = HSA_HEAPTYPE;
using underlying_heap_type_t = std::underlying_type_t<heap_type_t>;
auto subproperties =
read_map(node_path / "mem_banks" / std::to_string(i) / "properties");
auto _heap_type = underlying_heap_type_t{};
read_property(subproperties, "heap_type", _heap_type);
agent_info.mem_banks[i].heap_type = static_cast<heap_type_t>(_heap_type);
read_property(
subproperties, "size_in_bytes", agent_info.mem_banks[i].size_in_bytes);
read_property(subproperties, "flags", agent_info.mem_banks[i].flags.MemoryProperty);
read_property(subproperties, "width", agent_info.mem_banks[i].width);
read_property(subproperties, "mem_clk_max", agent_info.mem_banks[i].mem_clk_max);
}
}
if(agent_info.caches_count > 0)
{
agent_info.caches = new rocprofiler_agent_cache_t[agent_info.caches_count];
for(uint32_t i = 0; i < agent_info.caches_count; ++i)
{
auto subproperties =
read_map(node_path / "caches" / std::to_string(i) / "properties");
read_property(
subproperties, "processor_id_low", agent_info.caches[i].processor_id_low);
read_property(subproperties, "level", agent_info.caches[i].level);
read_property(subproperties, "size", agent_info.caches[i].size);
read_property(
subproperties, "cache_line_size", agent_info.caches[i].cache_line_size);
read_property(
subproperties, "cache_lines_per_tag", agent_info.caches[i].cache_lines_per_tag);
read_property(subproperties, "association", agent_info.caches[i].association);
read_property(subproperties, "latency", agent_info.caches[i].latency);
read_property(subproperties, "type", agent_info.caches[i].type.Value);
}
}
if(agent_info.io_links_count > 0)
{
agent_info.io_links = new rocprofiler_agent_io_link_t[agent_info.io_links_count];
for(uint32_t i = 0; i < agent_info.io_links_count; ++i)
{
auto subproperties =
read_map(node_path / "io_links" / std::to_string(i) / "properties");
read_property(subproperties, "type", agent_info.io_links[i].type);
read_property(subproperties, "version_major", agent_info.io_links[i].version_major);
read_property(subproperties, "version_minor", agent_info.io_links[i].version_minor);
read_property(subproperties, "node_from", agent_info.io_links[i].node_from);
read_property(subproperties, "node_to", agent_info.io_links[i].node_to);
read_property(subproperties, "weight", agent_info.io_links[i].weight);
read_property(subproperties, "min_latency", agent_info.io_links[i].min_latency);
read_property(subproperties, "max_latency", agent_info.io_links[i].max_latency);
read_property(subproperties, "min_bandwidth", agent_info.io_links[i].min_bandwidth);
read_property(subproperties, "max_bandwidth", agent_info.io_links[i].max_bandwidth);
read_property(subproperties,
"recommended_transfer_size",
agent_info.io_links[i].recommended_transfer_size);
read_property(subproperties, "flags", agent_info.io_links[i].flags.LinkProperty);
}
}
data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) {
if(ptr)
{
auto free_cstring = [](const char*& val) {
if(val && ::strnlen(val, 1) > 0) ::free(const_cast<char*>(val));
val = "";
};
delete[] ptr->mem_banks;
delete[] ptr->caches;
delete[] ptr->io_links;
free_cstring(ptr->name);
free_cstring(ptr->vendor_name);
free_cstring(ptr->product_name);
free_cstring(ptr->model_name);
}
delete ptr;
});
}
return data;
}
auto&
get_agent_topology()
{
static auto _v = read_topology();
return _v;
}
} // namespace
} // namespace agent
} // namespace rocprofiler
extern "C" {
rocprofiler_status_t
rocprofiler_query_available_agents(rocprofiler_available_agents_cb_t callback,
size_t agent_size,
void* user_data)
{
using pc_sampling_config_vec_t = std::vector<rocprofiler_pc_sampling_configuration_t>;
if(agent_size > sizeof(rocprofiler_agent_t))
{
LOG(ERROR) << "rocprofiler_agent_t used by caller is ABI-incompatible with "
"rocprofiler_agent_t in rocprofiler";
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI;
}
auto pc_sampling_configs = std::vector<pc_sampling_config_vec_t>{};
auto get_agents = [&pc_sampling_configs]() {
static const auto _default_pc_config =
rocprofiler_pc_sampling_configuration_t{ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
1UL,
1000000000UL,
0};
auto temporaries_ = std::vector<rocprofiler_agent_t>{};
const auto& agent_info = rocprofiler::hsa::all_agents();
for(const auto& agent : agent_info)
{
auto& _data = pc_sampling_configs.emplace_back();
if(agent.isGpu()) _data = {_default_pc_config};
temporaries_.emplace_back(rocprofiler_agent_t{
.id = rocprofiler_agent_id_t{.handle = temporaries_.size()},
.type = (agent.isCpu() ? ROCPROFILER_AGENT_TYPE_CPU
: (agent.isGpu() ? ROCPROFILER_AGENT_TYPE_GPU
: ROCPROFILER_AGENT_TYPE_NONE)),
.name = agent.getNameChar(),
.pc_sampling_configs =
rocprofiler_pc_sampling_config_array_t{_data.data(), _data.size()}});
}
return temporaries_;
};
auto agents = get_agents();
auto pointers = std::vector<rocprofiler_agent_t*>{};
// auto agents = get_agents();
auto& agents = rocprofiler::agent::get_agent_topology();
auto pointers = std::vector<const rocprofiler_agent_t*>{};
pointers.reserve(agents.size());
for(auto& agent : agents)
{
pointers.emplace_back(&agent);
pointers.emplace_back(agent.get());
}
assert(agent_size <= sizeof(rocprofiler_agent_t) &&
"rocprofiler_agent_t used by caller is ABI-incompatible with rocprofiler_agent_t in "
"rocprofiler");
return callback(pointers.data(), pointers.size(), user_data);
}
}
+1 -1
View File
@@ -104,7 +104,7 @@ findViaEnvironment(const std::string& filename)
} // namespace
MetricMap
getDerrivedHardwareMetrics()
getDerivedHardwareMetrics()
{
return loadXml(findViaEnvironment("derived_counters.xml"));
}
+2 -2
View File
@@ -48,7 +48,7 @@ MetricMap
getBaseHardwareMetrics();
MetricMap
getDerrivedHardwareMetrics();
getDerivedHardwareMetrics();
} // namespace counters
@@ -98,4 +98,4 @@ struct formatter<counters::MetricMap>
return fmt::format_to(ctx.out(), "{}", out);
}
};
} // namespace fmt
} // namespace fmt
@@ -7,13 +7,13 @@
namespace
{
auto
loadTestData(std::unordered_map<std::string, std::vector<std::vector<std::string>>> map)
loadTestData(const std::unordered_map<std::string, std::vector<std::vector<std::string>>>& map)
{
std::unordered_map<std::string, std::vector<counters::Metric>> ret;
for(auto& [gfx, dataMap] : map)
for(const auto& [gfx, dataMap] : map)
{
auto& metric_vec = ret.emplace(gfx, std::vector<counters::Metric>{}).first->second;
for(auto& data_vec : dataMap)
for(const auto& data_vec : dataMap)
{
metric_vec.emplace_back(
data_vec.at(0), data_vec.at(1), data_vec.at(2), data_vec.at(4), data_vec.at(3));
@@ -23,7 +23,7 @@ loadTestData(std::unordered_map<std::string, std::vector<std::vector<std::string
}
} // namespace
TEST(MetricsTest, BaseMetricLoad)
TEST(metrics, base_load)
{
auto x = counters::getBaseHardwareMetrics();
auto test_data = loadTestData(basic_gfx908);
@@ -32,11 +32,11 @@ TEST(MetricsTest, BaseMetricLoad)
EXPECT_EQ(fmt::format("{}", x["gfx908"]), fmt::format("{}", test_data["gfx908"]));
}
TEST(MetricsTest, DerrivedMetricLoad)
TEST(metrics, derived_load)
{
auto x = counters::getDerrivedHardwareMetrics();
auto x = counters::getDerivedHardwareMetrics();
auto test_data = loadTestData(derrived_gfx908);
ASSERT_EQ(x.count("gfx908"), 1);
ASSERT_EQ(test_data.count("gfx908"), 1);
EXPECT_EQ(fmt::format("{}", x["gfx908"]), fmt::format("{}", test_data["gfx908"]));
}
}
+2 -2
View File
@@ -1,5 +1,5 @@
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp agent.cpp)
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp agent.hpp)
set(ROCPROFILER_LIB_HSA_SOURCES hsa.cpp)
set(ROCPROFILER_LIB_HSA_HEADERS hsa.hpp defines.hpp types.hpp utils.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HSA_SOURCES}
${ROCPROFILER_LIB_HSA_HEADERS})
-499
View File
@@ -1,499 +0,0 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "agent.hpp"
#include <glog/logging.h>
#include <filesystem>
#include <fstream>
#include "lib/common/utility.hpp"
namespace fs = std::filesystem;
namespace rocprofiler
{
namespace hsa
{
namespace
{
std::unordered_map<long long, long long>
get_gpu_nodes_near_cpu()
{
std::unordered_map<long long, long long> gpu_numa_nodes_near_cpu;
long long gpu_numa_nodes_start = 0;
std::string path = "/sys/class/kfd/kfd/topology/nodes";
for(const auto& entry : fs::directory_iterator(path))
{
long long node_id = std::stoll(entry.path().filename().c_str());
std::ifstream gpu_id_file;
std::string gpu_path = entry.path().c_str();
gpu_path += "/gpu_id";
gpu_id_file.open(gpu_path);
std::string gpu_id_str;
if(gpu_id_file.is_open())
{
gpu_id_file >> gpu_id_str;
if(!gpu_id_str.empty())
{
auto gpu_id = std::stoll(gpu_id_str);
if(gpu_id > 0 && (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0))
{
gpu_numa_nodes_start = node_id;
}
}
}
gpu_id_file.close();
}
path = "/sys/class/kfd/kfd/topology/nodes";
for(const auto& entry : fs::directory_iterator(path))
{
long long node_id = std::stoll(entry.path().filename().c_str());
std::string numa_node_path = entry.path().c_str();
long long agent_id = std::stoll(entry.path().filename().c_str());
if(agent_id >= gpu_numa_nodes_start)
{
numa_node_path += "/io_links";
for(const auto& numa_node_entry : fs::directory_iterator(numa_node_path))
{
std::string numa_node_entry_properties_path = numa_node_entry.path().c_str();
numa_node_entry_properties_path += "/properties";
std::ifstream gpu_properties_file;
gpu_properties_file.open(numa_node_entry_properties_path);
std::string gpu_properties_file_line;
if(gpu_properties_file.is_open())
{
while(gpu_properties_file)
{
std::getline(gpu_properties_file, gpu_properties_file_line);
std::string delimiter = " ";
std::stringstream ss(gpu_properties_file_line);
std::string word;
ss >> word;
if(word == "node_to")
{
ss >> word;
long long near_cpu_node_id = std::stoll(word);
if(near_cpu_node_id < gpu_numa_nodes_start)
{
gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id;
}
}
}
}
gpu_properties_file.close();
}
}
}
return gpu_numa_nodes_near_cpu;
}
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg)
{
if(!data) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
auto [api_ptr, pool_ptr] =
*static_cast<std::pair<const AmdExtTable*, hsa_amd_memory_pool_t*>*>(data);
hsa_amd_segment_t segment;
LOG_IF(FATAL,
api_ptr->hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment) == HSA_STATUS_ERROR)
<< "Could not get pool segment";
if(HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
uint32_t flag;
LOG_IF(FATAL,
api_ptr->hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag) == HSA_STATUS_ERROR)
<< "Could not get flag value";
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg))
{
return HSA_STATUS_SUCCESS;
}
*(pool_ptr) = pool;
return HSA_STATUS_INFO_BREAK;
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t
FindStandardPool(hsa_amd_memory_pool_t pool, void* data)
{
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t
FindKernArgPool(hsa_amd_memory_pool_t pool, void* data)
{
return FindGlobalPool(pool, data, true);
}
void
init_cpu_pool(const AmdExtTable& api, AgentInfo& cpu_agent)
{
CHECK(!cpu_agent.isGpu());
auto params = std::make_pair(&api, &cpu_agent.cpu_pool);
auto status =
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindStandardPool, &params);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: Command Buffer Pool is not initialized";
params.second = &cpu_agent.kernarg_pool;
status =
api.hsa_amd_agent_iterate_memory_pools_fn(cpu_agent.getAgent(), FindKernArgPool, &(params));
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: Output Buffer Pool is not initialized";
}
void
init_gpu_pool(const AmdExtTable& api, AgentInfo& agent_info)
{
CHECK(agent_info.isGpu());
auto params = std::make_pair(&api, &agent_info.gpu_pool);
auto status =
api.hsa_amd_agent_iterate_memory_pools_fn(agent_info.getAgent(), FindStandardPool, &params);
LOG_IF(FATAL, status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK)
<< "Error: GPU Pool is not initialized";
}
} // namespace
const std::vector<AgentInfo>&
all_agents()
{
static std::shared_ptr<const std::vector<AgentInfo>> agents = AgentInfo::getAgents(
{.hsa_iterate_agents_fn = hsa_iterate_agents, .hsa_agent_get_info_fn = hsa_agent_get_info},
{.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info,
.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools,
.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate,
.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free,
.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access});
return *agents;
}
std::shared_ptr<const std::vector<AgentInfo>>
AgentInfo::getAgents(const CoreApiTable& api, const AmdExtTable& ext_api)
{
std::vector<hsa_agent_t> agents;
std::shared_ptr<std::vector<AgentInfo>> agent_info_ptr =
std::make_shared<std::vector<AgentInfo>>();
auto& agent_info = *agent_info_ptr;
api.hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* data) {
CHECK_NOTNULL(static_cast<std::vector<hsa_agent_t>*>(data))->emplace_back(agent);
return HSA_STATUS_SUCCESS;
},
&agents);
auto near_gpu_map = get_gpu_nodes_near_cpu();
std::unordered_map<int64_t, AgentInfo*> cpu_id_to_agent;
// Reserve is required to prevent reallocation (which breaks cpu_id_to_agent)
agent_info.reserve(agents.size());
for(auto& agent : agents)
{
auto& new_agent = agent_info.emplace_back(agent, api);
if(!new_agent.isGpu())
{
uint32_t cpu_numa_node_id;
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) !=
HSA_STATUS_SUCCESS)
<< "Could not fetch numa info";
new_agent.setNumaNode(cpu_numa_node_id);
cpu_id_to_agent[cpu_numa_node_id] = &new_agent;
init_cpu_pool(ext_api, new_agent);
}
else if(new_agent.isGpu())
{
uint32_t node_id;
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(
agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&node_id) != HSA_STATUS_SUCCESS)
<< "Could not fetch driver node id";
new_agent.setIndex(node_id);
LOG_IF(FATAL,
api.hsa_agent_get_info_fn(agent,
static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NODE),
&node_id) != HSA_STATUS_SUCCESS)
<< "Could not fetch driver node id";
new_agent.setNumaNode(node_id);
init_gpu_pool(ext_api, new_agent);
}
}
// Sperate for loop to allow cpu_id_to_agent to populate (in case CPUs are not always the first
// NUMA nodes)
for(auto& agent : agent_info)
{
if(agent.isGpu())
{
auto* near_gpu = common::get_val(near_gpu_map, agent.getNumaNode());
LOG_IF(FATAL, !near_gpu) << fmt::format("No CPU Agent near GPU Agent: {} {}", agent);
auto* id_to_agent = common::get_val(cpu_id_to_agent, *near_gpu);
LOG_IF(FATAL, !id_to_agent) << fmt::format("Cannot convert id to agent: {}", *near_gpu);
agent.setNearCpuAgent((*id_to_agent)->getAgent());
agent.cpu_pool = (*id_to_agent)->cpu_pool;
agent.kernarg_pool = (*id_to_agent)->kernarg_pool;
}
}
return agent_info_ptr;
}
AgentInfo::AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table)
: handle_(agent.handle)
, agent_(agent)
{
if(table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info failed";
}
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
memcpy(gfxip_, name_, gfxip_label_len);
gfxip_[gfxip_label_len] = '\0';
if(type_ != HSA_DEVICE_TYPE_GPU)
{
return;
}
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
table.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
table.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
if(table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&waves_per_cu_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info for gfxip hardware configuration failed";
}
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
if(table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_DOMAIN,
&pci_domain_) != HSA_STATUS_SUCCESS ||
table.hsa_agent_get_info_fn(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID,
&pci_location_id_) != HSA_STATUS_SUCCESS)
{
LOG(FATAL) << "hsa_agent_get_info for PCI info failed";
}
}
uint64_t
AgentInfo::getIndex() const
{
return index_;
}
hsa_device_type_t
AgentInfo::getType() const
{
return type_;
}
uint64_t
AgentInfo::getHandle() const
{
return handle_;
}
const std::string_view
AgentInfo::getName() const
{
return name_;
}
std::string
AgentInfo::getGfxip() const
{
return std::string(gfxip_);
}
uint32_t
AgentInfo::getMaxWaveSize() const
{
return max_wave_size_;
}
uint32_t
AgentInfo::getMaxQueueSize() const
{
return max_queue_size_;
}
uint32_t
AgentInfo::getCUCount() const
{
return cu_num_;
}
uint32_t
AgentInfo::getSimdCountPerCU() const
{
return simds_per_cu_;
}
uint32_t
AgentInfo::getShaderEngineCount() const
{
return se_num_;
}
uint32_t
AgentInfo::getShaderArraysPerSE() const
{
return shader_arrays_per_se_;
}
uint32_t
AgentInfo::getMaxWavesPerCU() const
{
return waves_per_cu_;
}
uint32_t
AgentInfo::getCUCountPerSH() const
{
return compute_units_per_sh_;
}
uint32_t
AgentInfo::getWaveSlotsPerSimd() const
{
return wave_slots_per_simd_;
}
uint32_t
AgentInfo::getPCIDomain() const
{
return pci_domain_;
}
uint32_t
AgentInfo::getPCILocationID() const
{
return pci_location_id_;
}
uint32_t
AgentInfo::getXccCount() const
{
return xcc_num_;
}
void
AgentInfo::setIndex(uint64_t index)
{
index_ = index;
}
void
AgentInfo::setType(hsa_device_type_t type)
{
type_ = type;
}
void
AgentInfo::setHandle(uint64_t handle)
{
handle_ = handle;
}
void
AgentInfo::setName(const std::string& name)
{
constexpr auto name_len = sizeof(name_) / sizeof(char);
//
// char* strncpy(char* destination, const char* source, size_t num)
//
// If the end of the source string (which is signaled by a null-character) is found before num
// characters have been copied, destination is padded with zeros until a total of num characters
// have been written to it
strncpy(name_, name.c_str(), name_len - 2);
// ensure always terminated
name_[name_len - 1] = '\0';
}
void
AgentInfo::setNumaNode(uint32_t numa_node)
{
numa_node_ = numa_node;
}
uint32_t
AgentInfo::getNumaNode() const
{
return numa_node_;
}
void
AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent)
{
near_cpu_agent_ = near_cpu_agent;
}
hsa_agent_t
AgentInfo::getNearCpuAgent()
{
return near_cpu_agent_;
}
} // namespace hsa
} // namespace rocprofiler
-174
View File
@@ -1,174 +0,0 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include "fmt/core.h"
#include "fmt/ranges.h"
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "lib/common/utility.hpp"
namespace rocprofiler
{
namespace hsa
{
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
class AgentInfo
{
public:
AgentInfo() = default;
AgentInfo(const hsa_agent_t agent, const ::CoreApiTable& table);
uint64_t getIndex() const;
hsa_device_type_t getType() const;
bool isGpu() const { return getType() == HSA_DEVICE_TYPE_GPU; }
bool isCpu() const { return getType() == HSA_DEVICE_TYPE_CPU; }
uint64_t getHandle() const;
const std::string_view getName() const;
const char* getNameChar() const { return name_; }
std::string getGfxip() const;
uint32_t getMaxWaveSize() const;
uint32_t getMaxQueueSize() const;
uint32_t getCUCount() const;
uint32_t getSimdCountPerCU() const;
uint32_t getShaderEngineCount() const;
uint32_t getShaderArraysPerSE() const;
uint32_t getMaxWavesPerCU() const;
uint32_t getCUCountPerSH() const;
uint32_t getWaveSlotsPerSimd() const;
uint32_t getPCIDomain() const;
uint32_t getPCILocationID() const;
uint32_t getXccCount() const;
void setIndex(uint64_t index);
void setType(hsa_device_type_t type);
void setHandle(uint64_t handle);
void setName(const std::string& name);
void setNumaNode(uint32_t numa_node);
uint32_t getNumaNode() const;
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
hsa_agent_t getNearCpuAgent();
hsa_agent_t getAgent() const { return agent_; }
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t kernarg_pool;
hsa_amd_memory_pool_t gpu_pool;
static std::shared_ptr<const std::vector<AgentInfo>> getAgents(const CoreApiTable&,
const AmdExtTable&);
// Keep move constuctors (i.e. std::move())
AgentInfo(AgentInfo&& other) noexcept = default;
AgentInfo& operator=(AgentInfo&& other) noexcept = default;
// Do not allow copying this class
AgentInfo(const AgentInfo&) = delete;
AgentInfo& operator=(const AgentInfo&) = delete;
private:
uint64_t index_ = 0;
hsa_device_type_t type_ = HSA_DEVICE_TYPE_CPU; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint64_t handle_ = 0;
char name_[64] = {'\0'};
char gfxip_[64] = {'\0'};
uint32_t max_wave_size_ = 0;
uint32_t max_queue_size_ = 0;
uint32_t cu_num_ = 0;
uint32_t simds_per_cu_ = 0;
uint32_t se_num_ = 0;
uint32_t shader_arrays_per_se_ = 0;
uint32_t waves_per_cu_ = 0;
// CUs per SH/SA
uint32_t compute_units_per_sh_ = 0;
uint32_t wave_slots_per_simd_ = 0;
// Number of XCCs on the GPU
uint32_t xcc_num_ = 0;
uint32_t pci_domain_ = 0;
uint32_t pci_location_id_ = 0;
uint32_t numa_node_ = 0;
hsa_agent_t near_cpu_agent_ = {};
hsa_agent_t agent_ = {};
};
const std::vector<AgentInfo>&
all_agents();
} // namespace hsa
} // namespace rocprofiler
namespace fmt
{
template <>
struct formatter<rocprofiler::hsa::AgentInfo>
{
template <typename ParseContext>
constexpr auto parse(ParseContext& ctx)
{
return ctx.begin();
}
template <typename Ctx>
auto format(rocprofiler::hsa::AgentInfo const& agent, Ctx& ctx) const
{
auto device_type_name = [](auto dev) -> std::string_view {
switch(dev)
{
case HSA_DEVICE_TYPE_CPU: return "CPU";
case HSA_DEVICE_TYPE_GPU: return "GPU";
case HSA_DEVICE_TYPE_DSP: return "DSP";
}
return "UNKNOWN";
};
return fmt::format_to(
ctx.out(),
R"({{"index":"{}","type":"{}","handle":"{}","name":"{}","gfxip":"{}","MaxWaveSize":"{}","MaxQueueSize":"{}","CUCount":"{}","SimdCountPerCU":"{}","ShaderEngineCount":"{}","ShaderArraysPerSE":"{}","MaxWavesPerCU":"{}","CUCountPerSH":"{}","WaveSlotsPerSimd":"{}","PCIDomain":"{}","PCILocationID":"{}","XccCount":"{}"}})",
agent.getIndex(),
device_type_name(agent.getType()),
agent.getHandle(),
agent.getName(),
agent.getGfxip(),
agent.getMaxWaveSize(),
agent.getMaxQueueSize(),
agent.getCUCount(),
agent.getSimdCountPerCU(),
agent.getShaderEngineCount(),
agent.getShaderArraysPerSE(),
agent.getMaxWavesPerCU(),
agent.getCUCountPerSH(),
agent.getWaveSlotsPerSimd(),
agent.getPCIDomain(),
agent.getPCILocationID(),
agent.getXccCount());
}
};
} // namespace fmt
+22 -22
View File
@@ -100,28 +100,6 @@ get_forced_configure()
return _v;
}
void
init_logging()
{
static auto _once = std::once_flag{};
std::call_once(_once, []() {
auto get_argv0 = []() {
auto ifs = std::ifstream{"/proc/self/cmdline"};
auto sarg = std::string{};
while(ifs && !ifs.eof())
{
ifs >> sarg;
if(!sarg.empty()) break;
}
return sarg;
};
static auto argv0 = get_argv0();
google::InitGoogleLogging(argv0.c_str());
LOG(INFO) << "logging initialized";
});
}
std::vector<std::string>
get_link_map()
{
@@ -397,6 +375,28 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
}
} // namespace
void
init_logging()
{
static auto _once = std::once_flag{};
std::call_once(_once, []() {
auto get_argv0 = []() {
auto ifs = std::ifstream{"/proc/self/cmdline"};
auto sarg = std::string{};
while(ifs && !ifs.eof())
{
ifs >> sarg;
if(!sarg.empty()) break;
}
return sarg;
};
static auto argv0 = get_argv0();
google::InitGoogleLogging(argv0.c_str());
LOG(INFO) << "logging initialized";
});
}
uint32_t
get_client_offset()
{
+4
View File
@@ -54,6 +54,10 @@ namespace rocprofiler
{
namespace registration
{
// initialize google logging
void
init_logging();
// initialize the clients
void
initialize();
-16
View File
@@ -24,22 +24,6 @@
#include <rocprofiler/rocprofiler.h>
#include "lib/common/utility.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/context/domain.hpp"
#include "lib/rocprofiler/hsa/agent.hpp"
#include "lib/rocprofiler/hsa/hsa.hpp"
#include "lib/rocprofiler/registration.hpp"
#include <atomic>
#include <vector>
namespace
{
template <typename... Tp>
auto
consume_args(Tp&&...)
{}
} // namespace
extern "C" {
rocprofiler_status_t
+2 -2
View File
@@ -11,10 +11,10 @@ include(GoogleTest)
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_lib_sources buffer.cpp)
set(rocprofiler_lib_sources agent.cpp buffer.cpp timestamp.cpp version.cpp)
add_executable(rocprofiler-lib-tests)
target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources})
target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources} details/agent.cpp)
target_link_libraries(
rocprofiler-lib-tests
PRIVATE rocprofiler::rocprofiler-static-library
+217
View File
@@ -0,0 +1,217 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/agent.h>
#include <rocprofiler/fwd.h>
#include <rocprofiler/registration.h>
#include "lib/rocprofiler/registration.hpp"
#include "lib/rocprofiler/tests/details/agent.hpp"
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <pthread.h>
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <random>
#include <sstream>
#include <typeinfo>
TEST(rocprofiler_lib, agent_abi)
{
constexpr auto msg = "ABI break. NEW FIELDS MAY ONLY BE ADDED AT END OF STRUCT";
EXPECT_EQ(offsetof(rocprofiler_agent_t, size), 0) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, id), 8) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, type), 16) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_cores_count), 20) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_count), 24) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks_count), 28) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, caches_count), 32) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links_count), 36) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, cpu_core_id_base), 40) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_id_base), 44) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_simd), 48) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, lds_size_in_kb), 52) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, gds_size_in_kb), 56) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_gws), 60) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, wave_front_size), 64) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_xcc), 68) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_count), 72) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, array_count), 76) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_shader_banks), 80) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_arrays_per_engine), 84) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_simd_array), 88) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, simd_per_cu), 92) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_slots_scratch_cu), 96) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, gfx_target_version), 100) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_id), 104) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, device_id), 106) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, location_id), 108) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, domain), 112) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, drm_render_minor), 116) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_engines), 120) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_xgmi_engines), 124) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_sdma_queues_per_engine), 128) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, num_cp_queues), 132) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_ccompute), 136) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_engine_clk_fcompute), 140) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, sdma_fw_version), 144) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, fw_version), 148) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, capability), 152) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, cu_per_engine), 156) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, max_waves_per_cu), 160) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, family_id), 164) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_size), 168) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_size), 172) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, local_mem_size), 176) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, hive_id), 184) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, gpu_id), 192) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, workgroup_max_dim), 200) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, grid_max_dim), 212) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, mem_banks), 224) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, caches), 232) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, io_links), 240) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, name), 248) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, vendor_name), 256) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, product_name), 264) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, model_name), 272) << msg;
EXPECT_EQ(offsetof(rocprofiler_agent_t, pc_sampling_configs), 280) << msg;
// Add test for offset of new field above this. Do NOT change any existing values!
// If a new field is added, increase this value by the size of the new field(s)
EXPECT_EQ(sizeof(rocprofiler_agent_t), 296)
<< "ABI break. If you added a new field, make sure that this is the only new check that "
"failed. Please add a check for the new field at the offset and update this test to the "
"new size";
}
TEST(rocprofiler_lib, agent)
{
rocprofiler::registration::init_logging();
auto info_ret = std::system("/usr/bin/rocminfo");
EXPECT_EQ(info_ret, 0);
auto sys_ret = std::system(
"/bin/bash -c 'for i in $(find /sys/class/kfd/kfd/topology/nodes -maxdepth 2 -type f | "
"grep properties | sort); do echo -e \"\n##### ${i} #####\n\"; cat ${i}; echo \"\"; done'");
EXPECT_EQ(sys_ret, 0);
auto agents = std::vector<const rocprofiler_agent_t*>{};
rocprofiler_available_agents_cb_t iterate_cb =
[](const rocprofiler_agent_t** agents_arr, size_t num_agents, void* user_data) {
auto* agents_v = static_cast<std::vector<const rocprofiler_agent_t*>*>(user_data);
// EXPECT_EQ(num_agents, hsa_agents_v.size());
for(size_t i = 0; i < num_agents; ++i)
{
const auto* agent = agents_arr[i];
agents_v->emplace_back(agent);
}
return ROCPROFILER_STATUS_SUCCESS;
};
auto status =
rocprofiler_query_available_agents(iterate_cb,
sizeof(rocprofiler_agent_t),
const_cast<void*>(static_cast<const void*>(&agents)));
EXPECT_EQ(status, ROCPROFILER_STATUS_SUCCESS);
auto _rocm_info = rocprofiler::test::rocm_info{};
EXPECT_EQ(rocprofiler::test::get_info(_rocm_info), 0);
auto& hsa_agents_v = _rocm_info.agents;
ASSERT_EQ(agents.size(), hsa_agents_v.size());
for(size_t i = 0; i < agents.size(); ++i)
{
const auto* agent = agents.at(i);
auto msg = fmt::format("name={}, model={}, gfx version={}, id={}, type={}",
agent->name,
agent->model_name,
agent->gfx_target_version,
agent->id.handle,
agent->type == ROCPROFILER_AGENT_TYPE_CPU ? "CPU" : "GPU");
// std::cout << msg << std::endl;
EXPECT_LT(i, hsa_agents_v.size()) << msg;
if(i >= hsa_agents_v.size()) continue;
auto* hsa_agent = &hsa_agents_v.at(i);
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU)
{
EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_CPU) << msg;
}
else if(agent->type == ROCPROFILER_AGENT_TYPE_GPU)
{
EXPECT_EQ(hsa_agent->device_type, HSA_DEVICE_GPU) << msg;
}
else
{
EXPECT_TRUE(false) << msg << " :: agent-type != CPU|GPU :: " << agent->type;
}
EXPECT_EQ(std::string_view{agent->name}, std::string_view{hsa_agent->name}) << msg;
EXPECT_EQ(std::string_view{agent->vendor_name}, std::string_view{hsa_agent->vendor_name})
<< msg;
EXPECT_EQ(std::string_view{agent->product_name},
std::string_view{hsa_agent->device_mkt_name})
<< msg;
EXPECT_EQ(agent->simd_count, hsa_agent->compute_unit * hsa_agent->simds_per_cu) << msg;
EXPECT_EQ(agent->cu_count, hsa_agent->compute_unit) << msg;
EXPECT_EQ(agent->simd_per_cu, hsa_agent->simds_per_cu) << msg;
EXPECT_EQ(agent->wave_front_size, hsa_agent->wavefront_size) << msg;
EXPECT_EQ(agent->simd_arrays_per_engine, hsa_agent->shader_arrs_per_sh_eng) << msg;
EXPECT_EQ(agent->max_waves_per_cu, hsa_agent->max_waves_per_cu) << msg;
EXPECT_EQ(agent->num_shader_banks, hsa_agent->shader_engs) << msg;
EXPECT_EQ(agent->workgroup_max_size, hsa_agent->workgroup_max_size) << msg;
EXPECT_EQ(agent->workgroup_max_dim.x, hsa_agent->workgroup_max_dim[0]) << msg;
EXPECT_EQ(agent->workgroup_max_dim.y, hsa_agent->workgroup_max_dim[1]) << msg;
EXPECT_EQ(agent->workgroup_max_dim.z, hsa_agent->workgroup_max_dim[2]) << msg;
EXPECT_EQ(agent->grid_max_size, hsa_agent->grid_max_size) << msg;
EXPECT_EQ(agent->grid_max_dim.x, hsa_agent->grid_max_dim.x) << msg;
EXPECT_EQ(agent->grid_max_dim.y, hsa_agent->grid_max_dim.y) << msg;
EXPECT_EQ(agent->grid_max_dim.z, hsa_agent->grid_max_dim.z) << msg;
if(agent->type == ROCPROFILER_AGENT_TYPE_GPU)
{
// HSA lib doesn't set family ID for CPU-only but we do
EXPECT_EQ(agent->family_id, hsa_agent->family_id) << msg;
}
EXPECT_EQ(agent->fw_version.ui32.uCode, hsa_agent->ucode_version) << msg;
EXPECT_EQ(agent->sdma_fw_version.uCodeSDMA, hsa_agent->sdma_ucode_version) << msg;
if(hsa_agent->shader_engs > 0)
{
EXPECT_EQ(agent->cu_per_engine, hsa_agent->compute_unit / hsa_agent->shader_engs)
<< msg;
}
}
// clean up memory leak
for(auto& itr : _rocm_info.isas)
delete[] itr.name_str;
}
@@ -0,0 +1,478 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "agent.hpp"
#include "lib/common/utility.hpp"
#include <glog/logging.h>
#include <filesystem>
#include <fstream>
#include <grp.h>
#include <hsa/hsa.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <stdio.h>
#include <sstream>
#include <string>
#include <vector>
#define RET_IF_HSA_INIT_ERR(err) \
{ \
if((err) != HSA_STATUS_SUCCESS) \
{ \
CheckInitError(); \
RET_IF_HSA_ERR(err); \
} \
}
#define RET_IF_HSA_ERR(err) \
{ \
if((err) != HSA_STATUS_SUCCESS) \
{ \
char err_val[12]; \
char* err_str = nullptr; \
if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \
{ \
sprintf(&(err_val[0]), "%#x", (uint32_t) err); \
err_str = &(err_val[0]); \
} \
printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \
printf("Call returned %s\n", err_str); \
return (err); \
} \
}
// namespace fs = std::filesystem;
namespace rocprofiler
{
namespace test
{
namespace
{
// Acquire system information
hsa_status_t
AcquireSystemInfo(system_info_t* sys_info)
{
hsa_status_t err;
// Get Major and Minor version of runtime
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major);
RET_IF_HSA_ERR(err);
err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor);
RET_IF_HSA_ERR(err);
// Get timestamp frequency
err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_info->timestamp_frequency);
RET_IF_HSA_ERR(err);
// Get maximum duration of a signal wait operation
err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, &sys_info->max_wait);
RET_IF_HSA_ERR(err);
// Get Endianness of the system
err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness);
RET_IF_HSA_ERR(err);
// Get machine model info
err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &sys_info->machine_model);
RET_IF_HSA_ERR(err);
return err;
}
hsa_status_t
AcquireAgentInfoEntry(hsa_agent_t agent, agent_info_t* agent_i)
{
hsa_status_t err;
// Get agent name and vendor
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, &agent_i->vendor_name);
RET_IF_HSA_ERR(err);
// Get device marketing name
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_PRODUCT_NAME, &agent_i->device_mkt_name);
RET_IF_HSA_ERR(err);
// Get agent feature
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, &agent_i->agent_feature);
RET_IF_HSA_ERR(err);
// Get profile supported by the agent
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_i->agent_profile);
RET_IF_HSA_ERR(err);
// Get floating-point rounding mode
err = hsa_agent_get_info(
agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, &agent_i->float_rounding_mode);
RET_IF_HSA_ERR(err);
// Get max number of queue
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &agent_i->max_queue);
RET_IF_HSA_ERR(err);
// Get queue min size
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &agent_i->queue_min_size);
RET_IF_HSA_ERR(err);
// Get queue max size
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_i->queue_max_size);
RET_IF_HSA_ERR(err);
// Get queue type
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, &agent_i->queue_type);
RET_IF_HSA_ERR(err);
// Get agent node
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node);
RET_IF_HSA_ERR(err);
// Get device type
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_i->device_type);
RET_IF_HSA_ERR(err);
if(HSA_DEVICE_TYPE_GPU == agent_i->device_type)
{
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa);
RET_IF_HSA_ERR(err);
}
// Get cache size
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, agent_i->cache_size);
RET_IF_HSA_ERR(err);
// Get chip id
err =
hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID, &agent_i->chip_id);
RET_IF_HSA_ERR(err);
// Get cacheline size
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &agent_i->cacheline_size);
RET_IF_HSA_ERR(err);
// Get Max clock frequency
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &agent_i->max_clock_freq);
RET_IF_HSA_ERR(err);
// Internal Driver node ID
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, &agent_i->internal_node_id);
RET_IF_HSA_ERR(err);
// Max number of watch points on mem. addr. ranges to generate exeception
// events
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS,
&agent_i->max_addr_watch_pts);
RET_IF_HSA_ERR(err);
// Get Agent BDFID
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id);
RET_IF_HSA_ERR(err);
// Get Max Memory Clock
// Not supported by hsa_agent_get_info
// err = hsa_agent_get_info(agent,d
// (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY,
// &agent_i->mem_max_freq);
// RET_IF_HSA_ERR(err);
// Get Num SIMDs per CU
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &agent_i->simds_per_cu);
RET_IF_HSA_ERR(err);
// Get Num Shader Engines
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &agent_i->shader_engs);
RET_IF_HSA_ERR(err);
// Get Num Shader Arrays per Shader engine
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
&agent_i->shader_arrs_per_sh_eng);
RET_IF_HSA_ERR(err);
// Get number of Compute Unit
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &agent_i->compute_unit);
RET_IF_HSA_ERR(err);
// family id
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_ASIC_FAMILY_ID, &agent_i->family_id);
RET_IF_HSA_ERR(err);
// ucode version
err = hsa_agent_get_info(
agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_UCODE_VERSION, &agent_i->ucode_version);
RET_IF_HSA_ERR(err);
// sdma ucode version
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION,
&agent_i->sdma_ucode_version);
RET_IF_HSA_ERR(err);
// Check if the agent is kernel agent
if((agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) != 0)
{
// Get flaf of fast_f16 operation
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16);
RET_IF_HSA_ERR(err);
// Get wavefront size
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size);
RET_IF_HSA_ERR(err);
// Get max total number of work-items in a workgroup
err = hsa_agent_get_info(
agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &agent_i->workgroup_max_size);
RET_IF_HSA_ERR(err);
// Get max number of work-items of each dimension of a work-group
err = hsa_agent_get_info(
agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, &agent_i->workgroup_max_dim);
RET_IF_HSA_ERR(err);
// Get max number of a grid per dimension
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &agent_i->grid_max_dim);
RET_IF_HSA_ERR(err);
// Get max total number of work-items in a grid
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &agent_i->grid_max_size);
RET_IF_HSA_ERR(err);
// Get max number of fbarriers per work group
err = hsa_agent_get_info(
agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &agent_i->fbarrier_max_size);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent,
(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&agent_i->max_waves_per_cu);
RET_IF_HSA_ERR(err);
}
return err;
}
hsa_status_t
AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t* pool_i)
{
hsa_status_t err;
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &pool_i->segment);
RET_IF_HSA_ERR(err);
// Get the size of the POOL
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &pool_i->pool_size);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &pool_i->alloc_allowed);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &pool_i->alloc_granule);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &pool_i->pool_alloc_alignment);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pool_i->pl_access);
RET_IF_HSA_ERR(err);
return HSA_STATUS_SUCCESS;
}
hsa_status_t
get_pool_info(hsa_amd_memory_pool_t pool, void* data)
{
auto* info = static_cast<rocm_info*>(data);
auto& pool_i = info->pools.emplace_back();
auto err = AcquirePoolInfo(pool, &pool_i);
RET_IF_HSA_ERR(err);
return err;
}
hsa_status_t
AcquireISAInfo(hsa_isa_t isa, isa_info_t* isa_i)
{
hsa_status_t err;
uint32_t name_len;
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len);
RET_IF_HSA_ERR(err);
isa_i->name_str = new char[name_len];
if(isa_i->name_str == nullptr)
{
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS, isa_i->mach_models);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(
isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->def_rounding_modes);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(
isa, HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, isa_i->base_rounding_modes);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION, &isa_i->fast_f16);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM, &isa_i->workgroup_max_dim);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE, &isa_i->workgroup_max_size);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM, &isa_i->grid_max_dim);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE, &isa_i->grid_max_size);
RET_IF_HSA_ERR(err);
err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE, &isa_i->fbarrier_max_size);
RET_IF_HSA_ERR(err);
return err;
}
hsa_status_t
get_isa_info(hsa_isa_t isa, void* data)
{
auto* info = static_cast<rocm_info*>(data);
isa_info_t& isa_i = info->isas.emplace_back();
isa_i.name_str = nullptr;
RET_IF_HSA_ERR(AcquireISAInfo(isa, &isa_i));
return HSA_STATUS_SUCCESS;
}
hsa_status_t
AcquireAgentInfo(hsa_agent_t agent, void* data)
{
auto* info = static_cast<rocm_info*>(data);
agent_info_t& agent_i = info->agents.emplace_back();
RET_IF_HSA_ERR(AcquireAgentInfoEntry(agent, &agent_i));
RET_IF_HSA_ERR(hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, data));
{
auto err = hsa_agent_iterate_isas(agent, get_isa_info, data);
if(err != HSA_STATUS_ERROR_INVALID_AGENT) RET_IF_HSA_ERR(err);
}
return HSA_STATUS_SUCCESS;
}
void
CheckInitError()
{
printf("ROCm initialization failed\n");
// Check kernel module for ROCk is loaded
FILE* fd = popen("lsmod | grep amdgpu", "r");
char buf[16];
if(fread(buf, 1, sizeof(buf), fd) <= 0)
{
printf("ROCk module is NOT loaded, possibly no GPU devices\n");
return;
}
// Check if user belongs to group "video"
// @note: User who are not members of "video"
// group cannot access DRM services
int status = -1;
bool member = false;
char gr_name[] = "video";
struct group* grp = nullptr;
do
{
grp = getgrent();
if(grp == nullptr)
{
break;
}
status = memcmp(gr_name, grp->gr_name, sizeof(gr_name));
if(status == 0)
{
member = true;
break;
}
} while(grp != nullptr);
if(member == false)
{
printf("User is not member of \"video\" group\n");
return;
}
}
} // namespace
// Print out all static information known to HSA about the target system.
// Throughout this program, the Acquire-type functions make HSA calls to
// interate through HSA objects and then perform HSA get_info calls to
// acccumulate information about those objects. Corresponding to each
// Acquire-type function is a Display* function which display the
// accumulated data in a formatted way.
int
get_info(rocm_info& info)
{
RET_IF_HSA_INIT_ERR(hsa_init());
// This function will call HSA get_info functions to gather information
// about the system.
RET_IF_HSA_ERR(AcquireSystemInfo(&info.system));
RET_IF_HSA_ERR(hsa_iterate_agents(AcquireAgentInfo, &info));
RET_IF_HSA_ERR(hsa_shut_down());
return HSA_STATUS_SUCCESS;
}
#undef RET_IF_HSA_ERR
} // namespace test
} // namespace rocprofiler
@@ -0,0 +1,149 @@
// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include "fmt/core.h"
#include "fmt/ranges.h"
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "lib/common/utility.hpp"
namespace rocprofiler
{
namespace test
{
// This structure holds system information acquired through hsa info related
// calls, and is later used for reference when displaying the information.
struct system_info_t
{
uint16_t major = 0;
uint16_t minor = 0;
uint64_t timestamp_frequency = 0;
uint64_t max_wait = 0;
hsa_endianness_t endianness = {};
hsa_machine_model_t machine_model = {};
};
// This structure holds agent information acquired through hsa info related
// calls, and is later used for reference when displaying the information.
struct agent_info_t
{
char name[64] = {'\0'};
char vendor_name[64] = {'\0'};
char device_mkt_name[64] = {'\0'};
hsa_agent_feature_t agent_feature = {};
hsa_profile_t agent_profile = {};
hsa_default_float_rounding_mode_t float_rounding_mode = {};
uint32_t max_queue = 0;
uint32_t queue_min_size = 0;
uint32_t queue_max_size = 0;
hsa_queue_type_t queue_type = {};
uint32_t node = 0;
hsa_device_type_t device_type = {};
uint32_t cache_size[4] = {0, 0, 0, 0};
uint32_t chip_id = 0;
uint32_t cacheline_size = 0;
uint32_t max_clock_freq = 0;
uint32_t internal_node_id = 0;
uint32_t max_addr_watch_pts = 0;
uint32_t family_id = 0;
uint32_t ucode_version = 0;
uint32_t sdma_ucode_version = 0;
// HSA_AMD_AGENT_INFO_MEMORY_WIDTH is deprecated, so exclude
// uint32_t mem_max_freq; Not supported by get_info
uint32_t compute_unit = 0;
uint32_t wavefront_size = 0;
uint32_t workgroup_max_size = 0;
uint32_t grid_max_size = 0;
uint32_t fbarrier_max_size = 0;
uint32_t max_waves_per_cu = 0;
uint32_t simds_per_cu = 0;
uint32_t shader_engs = 0;
uint32_t shader_arrs_per_sh_eng = 0;
hsa_isa_t agent_isa = {};
hsa_dim3_t grid_max_dim = {0, 0, 0};
uint16_t workgroup_max_dim[3] = {0, 0, 0};
uint16_t bdf_id = 0;
bool fast_f16 = false;
};
// This structure holds memory pool information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct pool_info_t
{
uint32_t segment = 0;
size_t pool_size = 0;
bool alloc_allowed = false;
size_t alloc_granule = 0;
size_t pool_alloc_alignment = 0;
bool pl_access = false;
uint32_t global_flag = 0;
};
// This structure holds ISA information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct isa_info_t
{
char* name_str = nullptr;
uint32_t workgroup_max_size = 0;
hsa_dim3_t grid_max_dim = {0, 0, 0};
uint64_t grid_max_size = 0;
uint32_t fbarrier_max_size = 0;
uint16_t workgroup_max_dim[3] = {0, 0, 0};
bool def_rounding_modes[3] = {false, false, false};
bool base_rounding_modes[3] = {false, false, false};
bool mach_models[2] = {false, false};
bool profiles[2] = {false, false};
bool fast_f16 = false;
};
// This structure holds cache information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
struct cache_info_t
{
char* name_str = nullptr;
uint8_t level = 0;
uint32_t size = 0;
};
struct rocm_info
{
system_info_t system = {};
std::vector<agent_info_t> agents = {};
std::vector<pool_info_t> pools = {};
std::vector<isa_info_t> isas = {};
};
int
get_info(rocm_info& info);
} // namespace test
} // namespace rocprofiler
@@ -0,0 +1,41 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/fwd.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/utility.hpp"
#include <gtest/gtest.h>
TEST(rocprofiler_lib, timestamp)
{
auto beg = rocprofiler::common::timestamp_ns();
auto mid = rocprofiler_timestamp_t{};
auto ret = rocprofiler_get_timestamp(&mid);
auto end = rocprofiler::common::timestamp_ns();
EXPECT_EQ(ret, ROCPROFILER_STATUS_SUCCESS);
EXPECT_GT(beg, 0);
EXPECT_GT(mid, beg);
EXPECT_GT(end, mid);
}
+53
View File
@@ -0,0 +1,53 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/fwd.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/utility.hpp"
#include "rocprofiler/version.h"
#include <gtest/gtest.h>
TEST(rocprofiler_lib, version)
{
auto correct_version = std::tuple<uint32_t, uint32_t, uint32_t>(
ROCPROFILER_VERSION_MAJOR, ROCPROFILER_VERSION_MINOR, ROCPROFILER_VERSION_PATCH);
auto query_version = std::tuple<uint32_t, uint32_t, uint32_t>(0, 0, 0);
auto query_version_copy = std::tuple<uint32_t, uint32_t, uint32_t>(0, 0, 0);
auto ret0 = rocprofiler_get_version(&std::get<0>(query_version), nullptr, nullptr);
auto ret1 = rocprofiler_get_version(nullptr, &std::get<1>(query_version), nullptr);
auto ret2 = rocprofiler_get_version(nullptr, nullptr, &std::get<2>(query_version));
EXPECT_EQ(ret0, ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(ret1, ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(ret2, ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(query_version, correct_version);
auto reta = rocprofiler_get_version(&std::get<0>(query_version_copy),
&std::get<1>(query_version_copy),
&std::get<2>(query_version_copy));
EXPECT_EQ(reta, ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(query_version_copy, correct_version);
EXPECT_EQ(query_version_copy, query_version);
}
+1 -1
View File
@@ -20,4 +20,4 @@ gtest_add_tests(
TEST_LIST buffering-tests_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 120 LABELS "unittests")