a798a26227
* Agent information w/o using hsa-runtime
- remove lib/rocprofiler/hsa/agent.{hpp,cpp}
- update include/rocprofiler/agent.h
- basically all possible info from /sys/class/kfd/kfd/topology/nodes/*
* Print topology in rocprofiler_lib.agent test
- hack to help diagnose errors
* Update lib/rocprofiler/tests/details/agent.cpp
- use LOG_IF(WARNING, ...) instead of LOG_IF(FATAL, ...)
* Update lib/rocprofiler/tests/agent.cpp
- print rocminfo at beginning of test
- fix comparison of agent handle
- misc other checks
* Updte lib/rocprofiler/agent.cpp
- handle unreadable /sys/class/kfd/kfd/topology/nodes/<N>/properties file
* Update lib/tests/buffering/CMakeLists.txt
- increase timeout to 120
- buffering.parallel will timeout when thread sanitizing is enabled
* Update cmake: rocprofiler-drm
- find drm headers and libraries
* Update include/rocprofiler/agent.h
- add family_id field
* Update lib/rocprofiler/agent.cpp
- parse /proc/cpuinfo for name, family, apicid, etc.
- read_topology uses unique pointers to cleanup memory allocations
- implement name and gfxip
* Update lib/rocprofiler/tests/agent.cpp
- improved failure message
- check name/gfxip
- remove check against hsa_agent_t.handle
- this value is dependent on the address of C++ class
* Update lib/rocprofiler/tests/details/agent.cpp
- tweak gfxip_ variable which is broken for CPU
* Update lib/rocprofiler/agent.cpp
- update string handling for name and gfxip
* Update lib/rocprofiler/tests/agent.cpp
- minor output tweak
* Update lib/rocprofiler/registration.{hpp,cpp}
- registration::init_logging() function
* Update lib/rocprofiler/agent.cpp
- fix hex handling of GFX step version
* Update lib/rocprofiler/tests/details/agent.cpp
- fix format string when nearest CPUs not found
* Update lib/rocprofiler/tests/CMakeLists.txt
- exclude details/agent.cpp from being parsed for gtest tests
* Update include/rocprofiler/fwd.h
- add ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI status
* Update lib/rocprofiler/tests/details/agent.{hpp,cpp}
- replace with slightly modified implementation of rocminfo
- primary change was not printing
* Update lib/rocprofiler/tests/agent.cpp
- update test to use rocminfo data
* Update lib/rocprofiler/agent.cpp
- add pc_sampling_configs
- return error on incompatible ABI
* Update counters and counters tests
- rename test names for consistency
- fixed incorrect spelling of derived
* Add lib/rocprofiler/tests/{timestamp,version}.cpp
- add timestamp and version tests for rocprofiler_get_timestamp and rocprofiler_get_version, respectively
* Update lib/rocprofiler/tests/agent
- fix double free of name_str from isa_info_t
* Update include/rocprofiler/agent.h
- comments for rocprofiler_agent_mem_bank_t
- add rocprofiler_dim3_t
- comments for rocprofiler_agent_t
- add new fields to rocprofiler_agent_t
- cu_count
- workgroup_max_size
- workgroup_max_dim
- grid_max_size
- grid_max_dim
- vendor_name
- product_name
- change prototype of rocprofiler_available_agents_cb_t to be const agent**
* Update lib/rocprofiler/agent.cpp
- set size field
- implement:
- product_name
- vendor_name
- workgroup_max_size
- workgroup_max_dim
- grid_max_size
- grid_max_dim
- cu_count
* Update lib/rocprofiler/tests/agent.cpp
- changes for const agent*
* Update samples/pc_sampling
- updates for const agent*
* Update lib/rocprofiler/agent.cpp
- fix ABI compatibility check
- return incompatible if tool agent is larger than our agent
* Update include/rocprofiler/agent.h
- doxygen comments
- make size field of rocprofiler_agent_t uint64_t for consistency
- add gpu_id via /sys/class/kfd/kfd/.../<idx>/gpu_id
- add model_name via /sys/class/kfd/kfd/.../<idx>/name
* Update lib/rocprofiler/agent.cpp
- add read_file function (vector of strings)
- support enum in read_property
- assign model_name and gpu_id fields
- remove unique_id
* Update lib/rocprofiler/tests/details/agent.*
- support family id, ucode_version, sdma_ucode_version
* Update lib/rocprofiler/tests/agent.cpp
- Add rocprofiler_lib.agent_abi test
- Verify family_id, ucode_version, sdma_ucode_version
203 строки
7.7 KiB
C++
203 строки
7.7 KiB
C++
// Vladimir: The example that shows how a single user can use PC sampling
|
|
// on multiple GPU agents.
|
|
|
|
#include <rocprofiler/rocprofiler.h>
|
|
#include <string.h>
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <vector>
|
|
#include "common.h"
|
|
|
|
namespace
|
|
{
|
|
// GPU agents supporting some kind of PC sampling
|
|
std::vector<rocprofiler_agent_t> gpu_agents;
|
|
std::vector<rocprofiler_context_id_t> contexts;
|
|
std::vector<rocprofiler_buffer_id_t> buffer_ids;
|
|
|
|
rocprofiler_status_t
|
|
find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agents,
|
|
size_t num_agents,
|
|
void* data)
|
|
{
|
|
// data is required
|
|
if(!data) return ROCPROFILER_STATUS_ERROR;
|
|
|
|
auto* _out_agents = static_cast<std::vector<rocprofiler_agent_t>*>(data);
|
|
// find the first GPU agent
|
|
for(size_t i = 0; i < num_agents; i++)
|
|
{
|
|
if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU)
|
|
{
|
|
// Skip GPU agents not supporting PC sampling
|
|
// Vladimir: The assumption is that if a GPU agent does not support PC sampling,
|
|
// the size is 0.
|
|
if(agents[i]->pc_sampling_configs.size == 0) continue;
|
|
|
|
_out_agents->push_back(*agents[i]);
|
|
|
|
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
|
__FUNCTION__,
|
|
agents[i]->name,
|
|
agents[i]->id.handle,
|
|
agents[i]->type,
|
|
agents[i]->pc_sampling_configs.size);
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
printf("[%s] %s :: id=%zu, type=%i, num pc sample configs=%zu\n",
|
|
__FUNCTION__,
|
|
agents[i]->name,
|
|
agents[i]->id.handle,
|
|
agents[i]->type,
|
|
agents[i]->pc_sampling_configs.size);
|
|
}
|
|
}
|
|
|
|
return !_out_agents->empty() ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR;
|
|
}
|
|
|
|
void
|
|
find_all_gpu_agents_supporting_pc_sampling()
|
|
{
|
|
// This function returns the all gpu agents supporting some kind of PC sampling
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_query_available_agents(&find_all_gpu_agents_supporting_pc_sampling_impl,
|
|
sizeof(rocprofiler_agent_t),
|
|
static_cast<void*>(&gpu_agents)),
|
|
"Failed to find GPU agents");
|
|
}
|
|
} // namespace
|
|
|
|
void
|
|
configure_host_trap_sampling(rocprofiler_context_id_t context_id,
|
|
rocprofiler_buffer_id_t buffer_id,
|
|
rocprofiler_agent_t gpu_agent)
|
|
{
|
|
// Vladimir: Does MI200 have only one configuration?
|
|
assert(gpu_agent.pc_sampling_configs.size == 1);
|
|
|
|
// Extract the configuration
|
|
auto host_trap_config = gpu_agent.pc_sampling_configs.data[0];
|
|
|
|
// The mean of min_interval and max_interval
|
|
auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2;
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id,
|
|
gpu_agent,
|
|
host_trap_config.method,
|
|
host_trap_config.unit,
|
|
interval,
|
|
buffer_id),
|
|
"Cannot create host-trap PC sampling service");
|
|
}
|
|
|
|
rocprofiler_pc_sampling_configuration_t
|
|
extract_stochastic_config(rocprofiler_pc_sampling_config_array_t* configs)
|
|
{
|
|
// Iterate over an array of configurations and return the first one
|
|
// with stochasting method.
|
|
for(size_t i = 0; i < configs->size; i++)
|
|
{
|
|
if(configs->data[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
|
{
|
|
return configs->data[i];
|
|
}
|
|
}
|
|
printf("Improper use of the `extract_stochastic_config` function.");
|
|
exit(-1);
|
|
}
|
|
|
|
void
|
|
configure_stochastic_sampling(rocprofiler_context_id_t context_id,
|
|
rocprofiler_buffer_id_t buffer_id,
|
|
rocprofiler_agent_t gpu_agent)
|
|
{
|
|
// Find the configuration matching stochastic sampling in cycles
|
|
rocprofiler_pc_sampling_configuration_t stochastic_config =
|
|
extract_stochastic_config(&gpu_agent.pc_sampling_configs);
|
|
|
|
// The mean of min_interval and max_interval
|
|
auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2;
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id,
|
|
gpu_agent,
|
|
stochastic_config.method,
|
|
stochastic_config.unit,
|
|
interval,
|
|
buffer_id),
|
|
"Cannot create stochastic PC sampling service");
|
|
}
|
|
|
|
int
|
|
main(int /*argc*/, char** /*argv*/)
|
|
{
|
|
if(!find_first_gpu_agent())
|
|
{
|
|
fprintf(stderr, "no gpu agents were found\n");
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
find_all_gpu_agents_supporting_pc_sampling();
|
|
|
|
if(gpu_agents.empty())
|
|
{
|
|
printf("No availabe gpu agents\n");
|
|
exit(-1);
|
|
}
|
|
|
|
// Vladimir: The relations I assumed:
|
|
// - a context per gpu agent
|
|
// - a buffer per context
|
|
// - a pc sampling service per buffer
|
|
// How about the following: Single context with mulitple buffers and PC sampling services?
|
|
// When starting the context, does it start all PC sampling services at once?
|
|
|
|
for(auto gpu_agent : gpu_agents)
|
|
{
|
|
// creating a context
|
|
rocprofiler_context_id_t context_id;
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n");
|
|
contexts.push_back(context_id);
|
|
|
|
// creating a buffer that will hold pc sampling information
|
|
rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD;
|
|
rocprofiler_buffer_id_t buffer_id;
|
|
ROCPROFILER_CALL(rocprofiler_create_buffer(context_id,
|
|
BUFFER_SIZE_BYTES,
|
|
WATERMARK,
|
|
drop_buffer_action,
|
|
rocprofiler_pc_sampling_callback,
|
|
nullptr,
|
|
&buffer_id),
|
|
"Cannot create pc sampling buffer");
|
|
buffer_ids.push_back(buffer_id);
|
|
|
|
if(gpu_agent.name == MI200_NAME)
|
|
configure_host_trap_sampling(context_id, buffer_id, gpu_agent);
|
|
else
|
|
configure_stochastic_sampling(context_id, buffer_id, gpu_agent);
|
|
|
|
// Starting the context that should trigger PC sampling
|
|
ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context");
|
|
}
|
|
|
|
// Running the applicaiton
|
|
run_HIP_app();
|
|
|
|
for(size_t i = 0; i < gpu_agents.size(); i++)
|
|
{
|
|
// Stop the context that should stop PC sampling?
|
|
ROCPROFILER_CALL(rocprofiler_stop_context(contexts[i]), "Cannot start PC sampling context");
|
|
// Explicit buffer flush, before destroying it
|
|
ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_ids[i]), "Cannot destroy buffer");
|
|
// Destroying the buffer
|
|
ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_ids[i]), "Cannot destroy buffer");
|
|
}
|
|
|
|
return 0;
|
|
}
|