7f631de401
* Update lib/rocprofiler/agent.{hpp,cpp}
- get_agents() function for internal access to agent pointers
* Update AgentCache
- make member variables and member functions distinguish b/t hsa agent and rocprofiler agent clear
* Change ctor of AgentCache
* Update lib/rocprofiler/hsa/queue_controller.cpp
- QueueController::init uses agent::get_agent_cache
* Update lib/rocprofiler/hsa/agent_cache.*
- member function to get index
- operator== for rocprofiler_agent_t and hsa_agent_t
- removed hsa_iterate_agents from ctor (now in agent.cpp)
* Update lib/rocprofiler/agent.*
- construct_agent_cache function
- functions for rocprofiler agent <-> HSA agent
- functions for getting agent cache
* Update lib/rocprofiler/registration.cpp
- invoke construct_agent_cache when HSA table is receieved
* Update lib/rocprofiler/agent.cpp
- loosen failure conditions
- handle spurious duplicate entry warning
* Update lib/rocprofiler/agent.cpp
- improve read_map diagnostics
* Update lib/rocprofiler/agent.cpp
- avoid infinite loop in read_map
* Update lib/rocprofiler/agent.cpp
- handle empty kfd node properties file
* Update lib/rocprofiler/agent.cpp
- check for permissions to read a node properties file
* Update lib/rocprofiler/agent.cpp
- more checks on file readability
* Update lib/rocprofiler/tests/agent.cpp
- print virtual kfd topology
* Update lib/rocprofiler/tests/agent.cpp
- verify id.handle == hsa_agent internal node id
* Update lib/rocprofiler/tests/agent.cpp
- check node_id
- check location id
- check device id
- update abi test
* Update include/rocprofiler/agent.h
- add node_id field
- add reserved0 field to ensure new field increases struct size
* Update lib/rocprofiler/agent.cpp
- node_id instead of id.handle
* Update lib/rocprofiler/agent_cache.cpp
- node_id instead of id.handle
* Update samples/pc_sampling
- node_id for agent instead of id.handle
* Update lib/rocprofiler/buffer.cpp
- remove debug prints
129 řádky
5.0 KiB
C++
129 řádky
5.0 KiB
C++
#ifndef PC_SAMPLING_COMMON_H
|
|
#define PC_SAMPLING_COMMON_H
|
|
|
|
#include <rocprofiler/rocprofiler.h>
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <optional>
|
|
#include <string>
|
|
#include <string_view>
|
|
|
|
constexpr size_t BUFFER_SIZE_BYTES = 4096;
|
|
constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2);
|
|
const std::string_view MI200_NAME = "gfx90a";
|
|
|
|
#define ROCPROFILER_CALL(result, msg) \
|
|
{ \
|
|
rocprofiler_status_t CHECKSTATUS = result; \
|
|
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
|
{ \
|
|
puts(#result " failed"); \
|
|
} \
|
|
}
|
|
|
|
// We might want to test the calls that fails
|
|
// e.g. calling `rocprofiler_configure_pc_sampling_service `
|
|
// after previous initialization.
|
|
#define ROCPROFILER_CALL_FAILS(result, msg) \
|
|
{ \
|
|
rocprofiler_status_t CHECKSTATUS = result; \
|
|
if(CHECKSTATUS == ROCPROFILER_STATUS_SUCCESS) \
|
|
{ \
|
|
puts(#result " succeeded"); \
|
|
} \
|
|
}
|
|
|
|
static rocprofiler_status_t
|
|
find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, void* data)
|
|
{
|
|
// data is required
|
|
if(!data) return ROCPROFILER_STATUS_ERROR;
|
|
|
|
auto* _out_agent = static_cast<rocprofiler_agent_t*>(data);
|
|
// find the first GPU agent
|
|
for(size_t i = 0; i < num_agents; i++)
|
|
{
|
|
if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU)
|
|
{
|
|
*_out_agent = *agents[i];
|
|
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
|
__FUNCTION__,
|
|
_out_agent->name,
|
|
_out_agent->node_id,
|
|
_out_agent->type,
|
|
_out_agent->num_pc_sampling_configs);
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n",
|
|
__FUNCTION__,
|
|
agents[i]->name,
|
|
agents[i]->node_id,
|
|
agents[i]->type,
|
|
agents[i]->num_pc_sampling_configs);
|
|
}
|
|
}
|
|
return ROCPROFILER_STATUS_ERROR;
|
|
}
|
|
|
|
static std::optional<rocprofiler_agent_t>
|
|
find_first_gpu_agent()
|
|
{
|
|
// This function returns the first gpu agent it encounters.
|
|
// TODO: write the better function querying information about the agent,
|
|
// and return if the agent is MI200.
|
|
rocprofiler_agent_t gpu_agent;
|
|
|
|
auto status = rocprofiler_query_available_agents(
|
|
&find_first_gpu_agent_impl, sizeof(rocprofiler_agent_t), static_cast<void*>(&gpu_agent));
|
|
|
|
if(status != ROCPROFILER_STATUS_SUCCESS) return std::nullopt;
|
|
|
|
return gpu_agent;
|
|
}
|
|
|
|
static void
|
|
rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
|
rocprofiler_buffer_id_t /*buffer_id*/,
|
|
rocprofiler_record_header_t** headers,
|
|
size_t num_headers,
|
|
void* /*data*/,
|
|
uint64_t drop_count)
|
|
{
|
|
// Vladimir: I am not sure if this is the right way of iterating over PC sampling records.
|
|
printf(
|
|
"The number of delivered samples is: %zu, while the number of dropped samples is: %lu.\n",
|
|
num_headers,
|
|
drop_count);
|
|
|
|
for(size_t i = 0; i < num_headers; i++)
|
|
{
|
|
auto* cur_header = headers[i];
|
|
if(cur_header->category == ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING)
|
|
{
|
|
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_t*>(cur_header->payload);
|
|
printf("--- pc: %lx, dispatch_id: %lx, timestamp: %lu, hardware_id: %lu\n",
|
|
pc_sample->pc,
|
|
pc_sample->dispatch_id,
|
|
pc_sample->timestamp,
|
|
pc_sample->hardware_id);
|
|
// Vladimir: How to parse the remaining part of the `rocprofiler_pc_sampling_record_t`
|
|
// struct?
|
|
}
|
|
}
|
|
// Vladimr: We might want to add somewhere in the documentation that headars actually contain PC
|
|
// samples.
|
|
}
|
|
|
|
static void
|
|
run_HIP_app()
|
|
{
|
|
// TODO: provide the simple HIP app
|
|
}
|
|
|
|
#endif
|