SWDEV-297574: Support for pcie counters
Change-Id: I4a662c43a9d0cf883f336574baa09fc33b78b9af
[ROCm/rocprofiler commit: 3a639543e7]
This commit is contained in:
committed by
Ammar ELWazir
parent
ff80dd4dfa
commit
ef8eb4fef9
@@ -479,7 +479,11 @@ typedef enum {
|
||||
/**
|
||||
* Represents SPM records
|
||||
*/
|
||||
ROCPROFILER_SPM_RECORD = 4
|
||||
ROCPROFILER_SPM_RECORD = 4,
|
||||
/**
|
||||
* Represents Counters sampler records
|
||||
*/
|
||||
ROCPROFILER_COUNTERS_SAMPLER_RECORD = 5
|
||||
} rocprofiler_record_kind_t;
|
||||
|
||||
/**
|
||||
@@ -1789,7 +1793,11 @@ typedef enum {
|
||||
/**
|
||||
* HIP/HSA/ROCTX/SYS Trace.
|
||||
*/
|
||||
ROCPROFILER_API_TRACE = 6
|
||||
ROCPROFILER_API_TRACE = 6,
|
||||
/**
|
||||
* Sampled Counters
|
||||
*/
|
||||
ROCPROFILER_COUNTERS_SAMPLER = 7
|
||||
} rocprofiler_filter_kind_t;
|
||||
|
||||
/**
|
||||
@@ -1895,6 +1903,69 @@ typedef struct {
|
||||
|
||||
} rocprofiler_spm_parameter_t;
|
||||
|
||||
typedef enum{
|
||||
ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS = 0
|
||||
} rocprofiler_counters_sampler_counter_type_t;
|
||||
|
||||
typedef struct{
|
||||
char* name;
|
||||
rocprofiler_counters_sampler_counter_type_t type;
|
||||
} rocprofiler_counters_sampler_counter_input_t;
|
||||
|
||||
typedef struct{
|
||||
rocprofiler_counters_sampler_counter_type_t type;
|
||||
rocprofiler_record_counter_value_t value;
|
||||
} rocprofiler_counters_sampler_counter_output_t;
|
||||
|
||||
typedef struct{
|
||||
/**
|
||||
* Counters to profile
|
||||
*/
|
||||
rocprofiler_counters_sampler_counter_input_t* counters;
|
||||
/**
|
||||
* Counters count
|
||||
*/
|
||||
int counters_num;
|
||||
/**
|
||||
* Sampling rate (ms)
|
||||
*/
|
||||
uint32_t sampling_rate;
|
||||
/**
|
||||
* Total sampling duration (ms); time between sampling start/stop
|
||||
*/
|
||||
uint32_t sampling_duration;
|
||||
/**
|
||||
* Initial delay (ms)
|
||||
*/
|
||||
uint32_t initial_delay;
|
||||
/**
|
||||
* Preferred agents to collect counters from
|
||||
*/
|
||||
int gpu_agent_index;
|
||||
}rocprofiler_counters_sampler_parameters_t;
|
||||
|
||||
typedef struct{
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
/**
|
||||
* Agent Identifier to be used by the user to get the Agent Information using
|
||||
* ::rocprofiler_query_agent_info
|
||||
*/
|
||||
rocprofiler_agent_id_t gpu_id;
|
||||
/**
|
||||
* Counters, including identifiers to get counter information and Counters
|
||||
* values
|
||||
*/
|
||||
rocprofiler_counters_sampler_counter_output_t* counters;
|
||||
/**
|
||||
* Number of counter values
|
||||
*/
|
||||
uint32_t num_counters;
|
||||
}rocprofiler_record_counters_sampler_t;
|
||||
|
||||
/**
|
||||
* Filter Kind Data
|
||||
*/
|
||||
@@ -1915,6 +1986,10 @@ typedef union {
|
||||
* spm counters parameters
|
||||
*/
|
||||
rocprofiler_spm_parameter_t* spm_parameters;
|
||||
/**
|
||||
* sampled counters parameters
|
||||
*/
|
||||
rocprofiler_counters_sampler_parameters_t counters_sampler_parameters;
|
||||
} rocprofiler_filter_data_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -84,6 +84,16 @@ target_link_options(profiler_device_profiling PRIVATE "-Wl,--build-id=md5")
|
||||
add_dependencies(samples profiler_device_profiling)
|
||||
install(TARGETS profiler_device_profiling RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
## Build Counters Sampling example
|
||||
set_source_files_properties(counters_sampler/pcie_counters_example.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(pcie_counters_sampler counters_sampler/pcie_counters_example.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(pcie_counters_sampler PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(pcie_counters_sampler PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples pcie_counters_sampler)
|
||||
install(TARGETS pcie_counters_sampler RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
# ################################################################################################################
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Tracer Samples
|
||||
# ############################################################################################################################################
|
||||
|
||||
@@ -285,6 +285,16 @@ void FlushPCSamplingRecord(
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
void FlushCountersSamplerRecord(
|
||||
const rocprofiler_record_counters_sampler_t *counters_sampler_record) {
|
||||
for (uint32_t i = 0; i < counters_sampler_record->num_counters; i++) {
|
||||
output_file << ",Counter_" << i << "("
|
||||
<< std::to_string(counters_sampler_record->counters[i].value.value) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
output_file << std::endl;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin < end) {
|
||||
@@ -308,9 +318,16 @@ int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofil
|
||||
FlushPCSamplingRecord(pc_sampling_record);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
case ROCPROFILER_COUNTERS_SAMPLER_RECORD: {
|
||||
const rocprofiler_record_counters_sampler_t *counters_sampler_record =
|
||||
reinterpret_cast<const rocprofiler_record_counters_sampler_t *>(begin);
|
||||
FlushCountersSamplerRecord(counters_sampler_record);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
std::cout <<"unknown record\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
#include "../common/common.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int* gpuMem;
|
||||
int counter_option = 0;
|
||||
|
||||
std::vector<std::string> pcie_counters = {
|
||||
"CI_PERF_slv_MemRd_Bandwidth0", "CI_PERF_slv_MemWr_Bandwidth0", "CI_PERF_slv_totalMemRdTx",
|
||||
"CI_PERF_slv_totalMemWrTx", "CI_PERF_slv_totalTx"};
|
||||
|
||||
if(argc > 1) {
|
||||
counter_option = atoi(argv[1]);
|
||||
}
|
||||
else{
|
||||
std::cout<< "Please provide one of the counter index options as argument:\n";
|
||||
for(int i = 0; i < pcie_counters.size(); i++){
|
||||
std::cout<< "[" << i << "]: " << pcie_counters[i] << std::endl;
|
||||
}
|
||||
std::cout<< "Example:\n ./pcie_counters_sampler 1\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
prepare();
|
||||
// Initialize the tools
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Creating the session with given replay mode
|
||||
rocprofiler_session_id_t session_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
|
||||
|
||||
// Creating Output Buffer for the data
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
WriteBufferRecords(record, end_record, session_id, buffer_id);
|
||||
},
|
||||
0x999999, &buffer_id));
|
||||
|
||||
// Counters Sampler Filter
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
|
||||
|
||||
rocprofiler_counters_sampler_counter_input_t counters_input[2] = {
|
||||
{.name = const_cast<char*>(pcie_counters[counter_option].c_str()),
|
||||
.type = ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS}};
|
||||
|
||||
uint32_t rate = 1000;
|
||||
uint32_t duration = 5000;
|
||||
|
||||
rocprofiler_counters_sampler_parameters_t cs_parameters = {.counters = counters_input,
|
||||
.counters_num = 1,
|
||||
.sampling_rate = rate,
|
||||
.sampling_duration = duration,
|
||||
.gpu_agent_index = 0};
|
||||
CHECK_ROCPROFILER(
|
||||
rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_SAMPLER,
|
||||
rocprofiler_filter_data_t{.counters_sampler_parameters = cs_parameters},
|
||||
0, &filter_id, property));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
|
||||
// Normal HIP Calls
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
|
||||
|
||||
// KernelA and KernelB won't be profiled
|
||||
kernelCalls('A');
|
||||
kernelCalls('B');
|
||||
|
||||
std::cout << "Collecting samples for: " << pcie_counters[counter_option]
|
||||
<< " ; sampling rate: " << rate << " ms; duration: " << duration << " ms" << std::endl;
|
||||
// Activating the session
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
|
||||
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
|
||||
kernelCalls('C');
|
||||
kernelCalls('D');
|
||||
kernelCalls('E');
|
||||
kernelCalls('F');
|
||||
// Normal HIP Calls
|
||||
HIP_CALL(hipFree(gpuMem));
|
||||
|
||||
// allow sampler to run for 10 secs
|
||||
sleep(6);
|
||||
|
||||
// Deactivating session
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
|
||||
// Manual Flush user buffer request
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
|
||||
// Destroy sessions
|
||||
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// Destroy all profiling related objects(User buffer, sessions, filters,
|
||||
// etc..)
|
||||
CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -174,10 +174,12 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
|
||||
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
|
||||
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
|
||||
@@ -198,8 +200,10 @@ add_library(${ROCPROFILER_TARGET} SHARED
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${CORE_HSA_PACKETS_SRC_FILES}
|
||||
${CORE_HSA_QUEUES_SRC_FILES}
|
||||
|
||||
@@ -0,0 +1,189 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "mmio.h"
|
||||
#include <cstdint>
|
||||
#include "../../../utils/helper.h"
|
||||
#include "pcie_perfmon_registers_mi200.h"
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
namespace mmio {
|
||||
|
||||
void PrintFunctionPhase(const char* function_name, int phase){
|
||||
#if defined(DEBUG_TRACE)
|
||||
if (phase == 0)
|
||||
std::clog << function_name << "() START" << std::endl;
|
||||
else
|
||||
std::clog << function_name << "() END" << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char* function_name, int phase){
|
||||
#if defined(DEBUG_TRACE)
|
||||
if (phase == 0) {
|
||||
PrintFunctionPhase(function_name, phase);
|
||||
std::clog << "Old (index, data) : "<< std::hex << index_value << " " << data_value << std::endl;
|
||||
}
|
||||
else {
|
||||
std::clog << "New (index, data) : "<< std::hex << index_value << " " << data_value << std::endl;
|
||||
PrintFunctionPhase(function_name, phase);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
MMIO::MMIO(const Agent::AgentInfo& info) : agent_info_(&info), pci_memory_(nullptr), type_(DEFAULT_MMAP) {
|
||||
const auto pci_domain = agent_info_->getPCIDomain();
|
||||
const auto pci_location_id = agent_info_->getPCILocationID();
|
||||
|
||||
pci_device_ =
|
||||
pci_device_find_by_slot(pci_domain, pci_location_id >> 8, pci_location_id & 0xFF, 0);
|
||||
if (!pci_device_ || pci_device_probe(pci_device_)) fatal("failed to probe the GPU device\n");
|
||||
|
||||
// Look for a region between 256KB and 4096KB, 32-bit, non IO, and non prefetchable.
|
||||
for (size_t region = 0; region < sizeof(pci_device::regions) / sizeof(pci_device::regions[0]);
|
||||
++region)
|
||||
if (pci_device_->regions[region].is_64 == 0 &&
|
||||
pci_device_->regions[region].is_prefetchable == 0 &&
|
||||
pci_device_->regions[region].is_IO == 0 &&
|
||||
pci_device_->regions[region].size >= (256UL * 1024) &&
|
||||
pci_device_->regions[region].size <= (4096UL * 1024)) {
|
||||
pci_memory_size_ = pci_device_->regions[region].size;
|
||||
int err = pci_device_map_range(pci_device_, pci_device_->regions[region].base_addr,
|
||||
pci_device_->regions[region].size, PCI_DEV_MAP_FLAG_WRITABLE,
|
||||
(void**)&pci_memory_);
|
||||
if(err)
|
||||
fatal("failed to map the registers. Error code: %d\n", err);
|
||||
}
|
||||
|
||||
if (pci_memory_ == nullptr) fatal("could not find the pci memory address\n");
|
||||
|
||||
SetIndexDataRegisters(INDIRECT_REG_INDEX, INDIRECT_REG_DATA);
|
||||
}
|
||||
|
||||
MMIO::~MMIO() {
|
||||
if (pci_memory_)
|
||||
{
|
||||
int err = pci_device_unmap_range(pci_device_, pci_memory_, pci_memory_size_);
|
||||
if(err) warning("failed to unmap the pci memory. Error code: %d\n", err);
|
||||
}
|
||||
}
|
||||
|
||||
bool MMIO::RegisterWriteAPI(uint32_t reg_offset, uint32_t value){
|
||||
// access the mmap
|
||||
// write register offset to index register 0x38 of index/data pair (indirect addressing)
|
||||
// write register bits to data register 0x3c of index/data pair (indirect addressing)
|
||||
|
||||
// std::lock_guard<std::mutex> lock(mutex_);
|
||||
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 0);
|
||||
|
||||
// TODO: should work only if map is created
|
||||
|
||||
*index_reg_addr = reg_offset;
|
||||
*data_reg_addr = value;
|
||||
|
||||
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MMIO::RegisterReadAPI(uint32_t reg_offset, uint32_t& value){
|
||||
// access the mmap
|
||||
// write register offset to index register 0x38 of index/data pair (indirect addressing)
|
||||
// read register bits to data register 0x3c of index/data pair (indirect addressing)
|
||||
|
||||
// std::lock_guard<std::mutex> lock(mutex_);
|
||||
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 0);
|
||||
|
||||
// TODO: should work only if map is created
|
||||
|
||||
*index_reg_addr = reg_offset;
|
||||
// TODO: add delay here??
|
||||
value = *data_reg_addr;
|
||||
|
||||
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
|
||||
MMIO* mmio = nullptr;
|
||||
switch (type) {
|
||||
case PCIE_PERFMON: {
|
||||
mmio = GetMMIOInstance(type, info);
|
||||
if(mmio == nullptr){
|
||||
mmio = dynamic_cast<MMIO*>(new PciePerfmonMMIO(info));
|
||||
AddInstance(mmio);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DF_PERFMON: {
|
||||
break;
|
||||
}
|
||||
case UMC_PERFMON: {
|
||||
break;
|
||||
}
|
||||
case DEFAULT_MMAP: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return mmio;
|
||||
}
|
||||
|
||||
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info) {
|
||||
MMIO* mmio = nullptr;
|
||||
auto it = mmio_instances_.find(info.getHandle());
|
||||
if(it != mmio_instances_.end()){
|
||||
for(auto& mmio_instance: it->second){
|
||||
if(mmio_instance->Type() == type){
|
||||
mmio = mmio_instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mmio;
|
||||
}
|
||||
|
||||
void MMIOManager::AddInstance(MMIO* in_mmio_instance) {
|
||||
uint64_t handle = in_mmio_instance->GetAgentInfo().getHandle();
|
||||
mmio_instances_[handle].push_back(in_mmio_instance);
|
||||
}
|
||||
|
||||
void MMIOManager::DestroyMMIOInstance(MMIO* in_mmio_instance) {
|
||||
if(in_mmio_instance == nullptr)
|
||||
return;
|
||||
|
||||
uint64_t handle = in_mmio_instance->GetAgentInfo().getHandle();
|
||||
auto it = mmio_instances_.find(handle);
|
||||
if(it != mmio_instances_.end()){
|
||||
auto& mmio_array = it->second;
|
||||
// find instance in the array and remove it from the array
|
||||
mmio_array.erase(std::remove(mmio_array.begin(), mmio_array.end(), in_mmio_instance), mmio_array.end());
|
||||
}
|
||||
delete in_mmio_instance;
|
||||
}
|
||||
|
||||
|
||||
std::map<decltype(hsa_agent_t::handle), std::vector<MMIO*>> MMIOManager::mmio_instances_;
|
||||
|
||||
|
||||
} // namespace mmio
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
|
||||
@@ -0,0 +1,122 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_CORE_NON_GFXIP_COUNTERS_MMIO_H
|
||||
#define SRC_CORE_NON_GFXIP_COUNTERS_MMIO_H
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
|
||||
#include <pciaccess.h>
|
||||
#include <mutex>
|
||||
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <sstream>
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
namespace mmio {
|
||||
|
||||
#define FUNCTION_START() PrintFunctionPhase(__FUNCTION__, 0)
|
||||
#define FUNCTION_END() PrintFunctionPhase(__FUNCTION__, 1)
|
||||
|
||||
// uncomment below to see register write sequences
|
||||
// #define DEBUG_TRACE = 1
|
||||
|
||||
void PrintFunctionPhase(const char* function_name, int phase);
|
||||
void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char* function_name, int phase);
|
||||
|
||||
// Index/Data registers
|
||||
const static uint32_t INDIRECT_REG_INDEX = 0x38;
|
||||
const static uint32_t INDIRECT_REG_DATA = 0x3c;
|
||||
|
||||
typedef enum { DEFAULT_MMAP, DF_PERFMON, UMC_PERFMON, PCIE_PERFMON } mmap_type_t;
|
||||
|
||||
class MMIOManager;
|
||||
|
||||
class MMIO {
|
||||
public:
|
||||
virtual bool RegisterWriteAPI(uint32_t reg_offset, uint32_t value);
|
||||
virtual bool RegisterReadAPI(uint32_t reg_offset, uint32_t& value);
|
||||
virtual void SetIndexDataRegisters(const uint32_t index_reg, const uint32_t data_reg) {
|
||||
index_reg_addr = (uint32_t*)((char*)pci_memory_ + index_reg);
|
||||
data_reg_addr = (uint32_t*)((char*)pci_memory_ + data_reg);
|
||||
}
|
||||
|
||||
MMIO(MMIO& other) = delete;
|
||||
void operator=(const MMIO&) = delete;
|
||||
virtual ~MMIO();
|
||||
friend class MMIOManager;
|
||||
|
||||
const Agent::AgentInfo& GetAgentInfo() { return *agent_info_; }
|
||||
mmap_type_t Type() { return type_; }
|
||||
|
||||
protected:
|
||||
MMIO(const Agent::AgentInfo& info);
|
||||
|
||||
// default constructor; helpful for derived classes
|
||||
// which want to setup mmio construction differently
|
||||
MMIO(){ type_ = DEFAULT_MMAP; };
|
||||
|
||||
const Agent::AgentInfo* agent_info_;
|
||||
struct pci_device* pci_device_;
|
||||
size_t pci_memory_size_;
|
||||
uint32_t* pci_memory_;
|
||||
mmap_type_t type_;
|
||||
|
||||
uint32_t* index_reg_addr;
|
||||
uint32_t* data_reg_addr;
|
||||
};
|
||||
|
||||
// PciePerfmonMMIO has same mmio setup approach as
|
||||
// done in MMIO class
|
||||
class PciePerfmonMMIO : public MMIO{
|
||||
public:
|
||||
friend class MMIOManager;
|
||||
protected:
|
||||
PciePerfmonMMIO(const Agent::AgentInfo& info): MMIO(info) {
|
||||
type_ = PCIE_PERFMON;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
Class to manage mmio for UMC/DF/PCIe etc.
|
||||
The mmio approach for the different IPs may
|
||||
be same or different. For eg: UMC and PCIe share
|
||||
the same mmio and index/data registers
|
||||
*/
|
||||
class MMIOManager {
|
||||
public:
|
||||
static MMIO* CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info);
|
||||
static MMIO* GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info);
|
||||
static void DestroyMMIOInstance(MMIO* instance);
|
||||
private:
|
||||
static void AddInstance(MMIO* instance);
|
||||
static std::map<decltype(hsa_agent_t::handle), std::vector<MMIO*>> mmio_instances_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace mmio
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,116 @@
|
||||
#include "pcie_counters_mi200.h"
|
||||
#include "pcie_perfmon_registers_mi200.h"
|
||||
#include "perfmon.h"
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
PciePerfMonMI200::PciePerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
|
||||
mmio_ =
|
||||
dynamic_cast<mmio::PciePerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::PCIE_PERFMON, info));
|
||||
}
|
||||
|
||||
PciePerfMonMI200::~PciePerfMonMI200() {
|
||||
mmio::MMIOManager::DestroyMMIOInstance(dynamic_cast<mmio::MMIO*>(mmio_));
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::writeRegister(uint32_t reg_offset, uint32_t value){
|
||||
// mmio or ioctl approaches
|
||||
mmio_->RegisterWriteAPI(reg_offset, value);
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::readRegister(uint32_t reg_offset, uint32_t& value){
|
||||
// mmio or ioctl approaches
|
||||
mmio_->RegisterReadAPI(reg_offset, value);
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::SetCounterNames(std::vector<std::string>& counter_names) {
|
||||
counter_names_ = counter_names;
|
||||
// TODO: only one event at a time is supported at the moment
|
||||
auto it = PCIE_MI200::pcie_events_table.find(counter_names[0]);
|
||||
if (it != PCIE_MI200::pcie_events_table.end()) {
|
||||
PCIE_MI200::pcie_event_t event_desc = it->second;
|
||||
if (event_desc.event_category == PCIE_MI200::RX_TILE_SCLK) {
|
||||
event_id_ = event_desc.event_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Start(){
|
||||
// TODO: make sure values stored in table
|
||||
// in registers header are dec and not hex
|
||||
|
||||
Start_RX_TILE_SCLK(event_id_);
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Stop(){
|
||||
// TODO: revisit correct value to stop
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x2); // stop
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values){
|
||||
uint64_t val=0;
|
||||
Read_RX_TILE_SCLK(val);
|
||||
rocprofiler_counters_sampler_counter_output_t value = {
|
||||
ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS,
|
||||
static_cast<double>(val)
|
||||
};
|
||||
values.push_back(value);
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Start_RX_TILE_TXCLK(uint32_t event){
|
||||
|
||||
// Step 1: PORT SEL update
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_EVENT_CI_PORT_SEL, 0x0);
|
||||
|
||||
// Step 2: EVENT SEL update
|
||||
uint32_t value = event; // last 8 bits for event
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_TXCLK3, value);
|
||||
|
||||
// Steps 3 & 4: Performance counters initialization, enable:
|
||||
// TODO: revisit. Just a single write with 0x3 might be enough (check with pcie team)
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x5);
|
||||
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Read_RX_TILE_TXCLK(uint64_t& result){
|
||||
// Step 5: Performance counters read:
|
||||
uint32_t lo_val, hi_val;
|
||||
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_TXCLK3, lo_val);
|
||||
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_UPVAL_TXCLK3, hi_val);
|
||||
|
||||
// Combine the lo and hi values and put them in result
|
||||
uint64_t val = (hi_val & 0xFFFFUL);
|
||||
val = val << 32;
|
||||
result = val | lo_val;
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Start_RX_TILE_SCLK(uint32_t event){
|
||||
|
||||
// Step 1: PORT SEL update
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_EVENT_CI_PORT_SEL, 0x0);
|
||||
|
||||
// Step 2: EVENT SEL update
|
||||
uint32_t value = event; // last 8 bits for event
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_LCLK1, value);
|
||||
|
||||
// Steps 3 & 4: Performance counters initialization, enable:
|
||||
// TODO: revisit. Just a single write with 0x3 might be enough (check with pcie team)
|
||||
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x5);
|
||||
|
||||
}
|
||||
|
||||
void PciePerfMonMI200::Read_RX_TILE_SCLK(uint64_t& result){
|
||||
// Step 5: Performance counters read:
|
||||
uint32_t lo_val, hi_val;
|
||||
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_LCLK1, lo_val);
|
||||
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_UPVAL_LCLK1, hi_val);
|
||||
|
||||
// Combine the lo and hi values and put them in result
|
||||
uint64_t val = (hi_val & 0xFFFFUL);
|
||||
val = val << 32;
|
||||
result = val | lo_val;
|
||||
}
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
#ifndef PCIE_COUNTERS_MI200_H
|
||||
#define PCIE_COUNTERS_MI200_H
|
||||
|
||||
#include "mmio.h"
|
||||
#include "perfmon.h"
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
/*
|
||||
One perfmon per GPU.
|
||||
Only one instance per GPU, per process
|
||||
*/
|
||||
|
||||
class PciePerfMonMI200 : public PerfMon {
|
||||
public:
|
||||
PciePerfMonMI200(const Agent::AgentInfo& info);
|
||||
~PciePerfMonMI200();
|
||||
void SetCounterNames(std::vector<std::string>& counter_names) override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
void Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values) override;
|
||||
mmio::mmap_type_t Type() override { return mmio::mmap_type_t::PCIE_PERFMON; }
|
||||
|
||||
private:
|
||||
// TODO : check google coding std
|
||||
void writeRegister(uint32_t reg_offset, uint32_t value);
|
||||
void readRegister(uint32_t reg_offset, uint32_t& value);
|
||||
|
||||
void Start_RX_TILE_TXCLK(uint32_t event);
|
||||
void Read_RX_TILE_TXCLK(uint64_t& result);
|
||||
|
||||
void Start_RX_TILE_SCLK(uint32_t event);
|
||||
void Read_RX_TILE_SCLK(uint64_t& result);
|
||||
|
||||
private:
|
||||
mmio::PciePerfmonMMIO* mmio_;
|
||||
std::vector<std::string> counter_names_;
|
||||
int event_id_;
|
||||
};
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,327 @@
|
||||
#ifndef PCIE_PERFMON_REGISTERS_MI200_H
|
||||
#define PCIE_PERFMON_REGISTERS_MI200_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace PCIE_MI200 {
|
||||
|
||||
// -------- RX Tile TXCLK Start --------
|
||||
|
||||
// Step 1: PORT SEL update
|
||||
const static uint32_t PCIE_PERF_CNTL_EVENT_CI_PORT_SEL = 0x11180250;
|
||||
|
||||
// Step 2: EVENT SEL update
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK1 = 0x11180204;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK2 = 0x11180210;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK3 = 0x1118021C; //#
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK4 = 0x11180228; //#
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK5 = 0x11180258;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK6 = 0x11180264;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK7 = 0x11180888;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK8 = 0x11180894;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK9 = 0x111808A0;
|
||||
const static uint32_t PCIE_PERF_CNTL_TXCLK10 = 0x111808AC;
|
||||
|
||||
// Steps 3 & 4: Performance counters initialization, enable:
|
||||
const static uint32_t PCIE_PERF_COUNT_CNTL = 0x11180200;
|
||||
|
||||
// Step 5: Performance counters read:
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK1 = 0x11180208;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK2 = 0x11180214;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK3 = 0x11180220; //#
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK4 = 0x1118022C; //#
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK5 = 0x1118025C;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK6 = 0x11180268;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK7 = 0x1118088C;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK8 = 0x11180898;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK9 = 0x111808A4;
|
||||
const static uint32_t PCIE_PERF_COUNT0_TXCLK10 = 0x111808B0;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK1 = 0x111808E8;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK2 = 0x111808F0;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK3 = 0x111808F8; //#
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK4 = 0x11180900; //#
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK5 = 0x11180908;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK6 = 0x11180910;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK7 = 0x11180918;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK8 = 0x11180920;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK9 = 0x11180928;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK10 = 0x11180930;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK1 = 0x1118020C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK2 = 0x11180218;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK3 = 0x11180224; //#
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK4 = 0x11180230; //#
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK5 = 0x11180260;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK6 = 0x1118026C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK7 = 0x11180890;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK8 = 0x1118089C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK9 = 0x111808A8;
|
||||
const static uint32_t PCIE_PERF_COUNT1_TXCLK10 = 0x111808B4;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK1 = 0x111808EC;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK2 = 0x111808F4;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK3 = 0x111808FC; //#
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK4 = 0x11180904; //#
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK5 = 0x1118090C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK6 = 0x11180914;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK7 = 0x1118091C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK8 = 0x11180924;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK9 = 0x1118092C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK10 = 0x11180934;
|
||||
|
||||
|
||||
// -------- RX Tile TXCLK End --------
|
||||
|
||||
// -------- RX Tile SCLK Start --------
|
||||
|
||||
// Step 1: PORT SEL update
|
||||
// PCIE_PERF_CNTL_EVENT_CI_PORT_SEL
|
||||
|
||||
// Step 2: EVENT SEL update
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK1 = 0x11180234;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK2 = 0x11180240;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK3 = 0x11180270;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK4 = 0x1118027C;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK5 = 0x111808B8;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK6 = 0x111808C4;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK7 = 0x111808D0;
|
||||
const static uint32_t PCIE_PERF_CNTL_LCLK8 = 0x111808DC;
|
||||
|
||||
// Step 5: Performance counters read:
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK1 = 0x11180238;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK2 = 0x11180244;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK3 = 0x11180274;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK4 = 0x11180280;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK5 = 0x111808BC;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK6 = 0x111808C8;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK7 = 0x111808D4;
|
||||
const static uint32_t PCIE_PERF_COUNT0_LCLK8 = 0x111808E0;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK1 = 0x11180938;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK2 = 0x11180940;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK3 = 0x11180948;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK4 = 0x11180950;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK5 = 0x11180958;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK6 = 0x11180960;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK7 = 0x11180968;
|
||||
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK8 = 0x11180970;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK1 = 0x1118023C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK2 = 0x11180248;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK3 = 0x11180278;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK4 = 0x11180284;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK5 = 0x111808C0;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK6 = 0x111808CC;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK7 = 0x111808D8;
|
||||
const static uint32_t PCIE_PERF_COUNT1_LCLK8 = 0x111808E4;
|
||||
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK1 = 0x1118093C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK2 = 0x11180944;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK3 = 0x1118094C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK4 = 0x11180954;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK5 = 0x1118095C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK6 = 0x11180964;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK7 = 0x1118096C;
|
||||
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK8 = 0x11180974;
|
||||
|
||||
// -------- RX Tile SCLK End ----------
|
||||
|
||||
typedef enum{
|
||||
TX_TILE_TXCLK = 0,
|
||||
TX_TILE_SCLK = 1,
|
||||
RX_TILE_TXCLK = 2,
|
||||
RX_TILE_SCLK = 3,
|
||||
LC_TILE_TXCLK = 4
|
||||
}pcie_event_category_t;
|
||||
|
||||
struct pcie_event_t{
|
||||
pcie_event_t(int id, pcie_event_category_t cat): event_id(id), event_category(cat){}
|
||||
int event_id;
|
||||
pcie_event_category_t event_category;
|
||||
};
|
||||
|
||||
const static std::map<std::string, pcie_event_t> pcie_events_table = {
|
||||
{"RX_PERF_RXP_RX_TailEdb_A[0]", {2, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEdb_A[1]", {3, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEdb_A[2]", {4, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEdb_A[3]", {5, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEnd_A[0]", {6, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEnd_A[1]", {7, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEnd_A[2]", {8, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_TailEnd_A[3]", {9, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadSdp_A[0]", {10, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadSdp_A[1]", {11, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadSdp_A[2]", {12, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadSdp_A[3]", {13, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadStp_A[0]", {14, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadStp_A[1]", {15, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadStp_A[2]", {16, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXP_RX_HeadStp_A[3]", {17, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXCRC_nullified_tlp_A", {18, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXCRC_valid_crc_A", {19, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RXCRC_invalid_crc_A", {20, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_vendor_type1_A", {21, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_vendor_type0_A", {22, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_set_slot_power_limit_A", {23, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_unlock_A", {24, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_err_fatal_A", {25, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_err_nonfatal_A", {26, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_err_corr_A", {27, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_pme_to_ack_A", {28, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_pme_turn_off_A", {29, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_pm_pme_A", {30, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_pm_active_state_nak_A", {31, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_deassert_intd_A", {32, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_deassert_intc_A", {33, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_deassert_intb_A", {34, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_deassert_inta_A", {35, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_assert_intd_A", {36, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_assert_intc_A", {37, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_assert_intb_A", {38, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_assert_inta_A", {39, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_valid_A", {40, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RMSG_unsupported_A", {41, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RCB_unexpected_cpl_A", {42, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_RCB_timeout_cpl_A", {43, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_HDS_tlphdrvalid_A", {44, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_HDS_tlpdatavalid_A", {45, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_GAN_bad_tlp_A", {46, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_GAN_nak_A", {47, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_GAN_ack_A", {48, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_unsupported_req_A", {49, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_unsupported_cpl_A", {50, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_unexpected_cpl_A", {51, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_poisoned_tlp_A", {52, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_poisoned_cpl_A", {53, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_malformed_tlp_A", {54, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_cpl_abort_A", {55, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_MSG_A", {56, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_CFG_WR_A", {57, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_CFG_RD_A", {58, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_IO_WR_A", {59, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_IO_RD_A", {60, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_MEM_WR_A", {61, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_request_MEM_RD_A", {62, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_MST_gt16_A", {63, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_MST_9to16_A", {64, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_MST_5to8_A", {65, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_MST_2to4_A", {66, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_MST_1_A", {67, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_gt32_A", {68, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_17to32_A", {69, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_9to16_A", {70, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_5to8_A", {71, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_2to4_A", {72, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_length_SLV_1_A", {73, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_cpl_status_CA_A", {74, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_cpl_status_CRS_A", {75, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_cpl_status_UR_A", {76, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_FE_cpl_status_SC_A", {77, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_pm_active_state_request_l1_A", {78, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_pm_request_ack_A", {79, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_pm_enter_l23_A", {80, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_pm_enter_l1_A", {81, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_error_A", {82, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_crc_err_A", {83, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_FCC_npd_0", {84, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_FCC_pd_0", {85, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_FCC_nph_0", {86, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_FCC_ph_0", {87, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_fail_crc_rd_hdr_0", {88, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_pass_crc_rd_hdr_0", {89, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_fail_crc_wr_hdr_0", {90, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_pass_crc_wr_hdr_0", {91, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_fail_crc_data_0", {92, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_pass_crc_data_0", {93, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_invalid_crc_0", {94, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_valid_crc_0", {95, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_rd_hdr_WEN_0", {96, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_wr_hdr_WEN_0", {97, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_data_WEN_0", {98, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_non_post_rd_from_FE", {99, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_non_post_wr_from_FE", {100, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_post_req_from_FE", {101, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_non_post_rd_from_FE_0", {102, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_non_post_wr_from_FE_0", {103, RX_TILE_TXCLK}},
|
||||
{"SB_PERF_post_req_from_FE_0", {104, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_nak_A", {111, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_DLLP_ack_A", {112, RX_TILE_TXCLK}},
|
||||
{"RX_PERF_allErrors_A", {113, RX_TILE_TXCLK}},
|
||||
{"perf_PG_COUNT", {175, RX_TILE_TXCLK}},
|
||||
{"perf_NOT_POWER_GATED", {176, RX_TILE_TXCLK}},
|
||||
{"perf_POWER_GATED", {177, RX_TILE_TXCLK}},
|
||||
|
||||
{"SB_PERF_non_post_rd_to_HI", {2, RX_TILE_SCLK}},
|
||||
{"SB_PERF_non_post_wr_to_HI", {3, RX_TILE_SCLK}},
|
||||
{"SB_PERF_post_req_to_HI", {4, RX_TILE_SCLK}},
|
||||
{"SB_PERF_non_post_rd_to_HI_0", {5, RX_TILE_SCLK}},
|
||||
{"SB_PERF_non_post_wr_to_HI_0", {6, RX_TILE_SCLK}},
|
||||
{"SB_PERF_post_req_to_HI_0", {7, RX_TILE_SCLK}},
|
||||
{"SB_PERF_rd_hdr_REN_0", {8, RX_TILE_SCLK}},
|
||||
{"SB_PERF_wr_hdr_REN_0", {9, RX_TILE_SCLK}},
|
||||
{"SB_PERF_data_REN_0", {10, RX_TILE_SCLK}},
|
||||
{"SB_PERF_rd_hdr_empty_0", {11, RX_TILE_SCLK}},
|
||||
{"SB_PERF_wr_hdr_empty_0", {12, RX_TILE_SCLK}},
|
||||
{"SB_PERF_data_empty_0", {13, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total128BRdCpl", {29, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total32BMemRdTx", {30, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total64BMemRdTx", {31, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total16BMemWrTx", {32, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total32BMemWrTx", {33, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_total64BMemWrTx", {34, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_totalTx", {35, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_stallGrantGen", {36, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_totalGrant", {37, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_txPending", {38, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_numMemRdLT32B", {39, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_numMemRdLT16B", {40, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_totalMemTx", {41, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_totalMemRdTx", {42, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_totalMemWrTx", {43, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_numGrant0", {44, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntOverFlow_ns0", {45, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntUnderFlow_ns0", {46, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntOverFlow_s0", {47, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntUnderFlow_s0", {48, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntOverFlow0", {49, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_portCntUnderFlow0", {50, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_npNotAccepted_ns0", {51, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_npNotAccepted_s0", {52, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num128BRdCpl0", {53, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num32BMemRdTx0", {54, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num64BMemRdTx0", {55, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num16BMemWrTx0", {56, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num32BMemWrTx0", {57, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_num64BMemWrTx0", {58, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemRd_Bandwidth0", {59, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemWr_Bandwidth0", {60, RX_TILE_SCLK}},
|
||||
{"TX_PERF_S_RCLK_s_tag_buf_empty", {61, RX_TILE_SCLK}},
|
||||
{"P_request_latency_500ns_or_more", {62, RX_TILE_SCLK}},
|
||||
{"P_request_latency_250_to_500ns", {63, RX_TILE_SCLK}},
|
||||
{"P_request_latency_100_to_250ns", {64, RX_TILE_SCLK}},
|
||||
{"P_request_latency_100ns_or_less", {65, RX_TILE_SCLK}},
|
||||
{"NP_request_latency_500ns_or_more", {66, RX_TILE_SCLK}},
|
||||
{"NP_request_latency_250_to_500ns", {67, RX_TILE_SCLK}},
|
||||
{"NP_request_latency_100_to_250ns", {68, RX_TILE_SCLK}},
|
||||
{"NP_request_latency_100ns_or_less", {69, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemRd_wait_for_cpl_slot[0]", {70, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemRd_wait_for_tag[0]", {71, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemRd_wait_for_d_credit[0]", {72, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemRd_wait_for_h_credit[0]", {73, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemWr_wait_for_tag[0]", {74, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemWr_wait_for_d_credit[0]", {75, RX_TILE_SCLK}},
|
||||
{"CI_PERF_slv_MemWr_wait_for_h_credit[0]", {76, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC1_no_tags_q", {77, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC1_data_credits_q", {78, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC1_req_credits_q", {79, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_cpl_slots_q[0]", {80, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC0_no_tags_q", {81, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC0_data_credits_q", {82, RX_TILE_SCLK}},
|
||||
{"CISLV_PERF_no_VC0_req_credits_q", {83, RX_TILE_SCLK}}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_CORE_COUNTERS_PERFMON_H
|
||||
#define SRC_CORE_COUNTERS_PERFMON_H
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "mmio.h"
|
||||
#include <vector>
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
class PerfMon {
|
||||
public:
|
||||
virtual ~PerfMon(){};
|
||||
virtual void Start() = 0;
|
||||
virtual void Stop() = 0;
|
||||
virtual void Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values) = 0;
|
||||
virtual void SetCounterNames(std::vector<std::string>& counter_names) {
|
||||
counter_names_ = counter_names;
|
||||
};
|
||||
virtual mmio::mmap_type_t Type() = 0;
|
||||
|
||||
protected:
|
||||
std::vector<std::string> counter_names_;
|
||||
};
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
#endif
|
||||
@@ -220,11 +220,18 @@ bool GetNextRecord(const rocprofiler_record_header_t* record,
|
||||
*next = reinterpret_cast<const rocprofiler_record_header_t*>(att_tracer_record + 1);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_COUNTERS_SAMPLER_RECORD: {
|
||||
const rocprofiler_record_counters_sampler_t* sampler_record =
|
||||
reinterpret_cast<const rocprofiler_record_counters_sampler_t*>(record);
|
||||
*next = reinterpret_cast<const rocprofiler_record_header_t*>(sampler_record + 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
const rocprofiler_record_tracer_t* tracer_record =
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(record);
|
||||
*next = reinterpret_cast<const rocprofiler_record_header_t*>(tracer_record + 1);
|
||||
// size_to_add = sizeof(rocprofiler_record_header_t);
|
||||
break;
|
||||
}
|
||||
// const std::byte* ptr = reinterpret_cast<const std::byte*>(record);
|
||||
// ptr += size_to_add;
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "counters_sampler.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include "src/api/rocmtool.h"
|
||||
#include "src/core/counters/mmio/pcie_counters_mi200.h"
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
CountersSampler::CountersSampler(
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_session_id_t session_id)
|
||||
: buffer_id_(buffer_id)
|
||||
, filter_id_(filter_id)
|
||||
, session_id_(session_id)
|
||||
, pci_system_initialized_(pci_system_init() == 0)
|
||||
|
||||
{
|
||||
params_ = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id_)
|
||||
->GetFilter(filter_id_)
|
||||
->GetCountersSamplerParameterData();
|
||||
|
||||
std::vector<hsa_agent_t> agents;
|
||||
rocmtools::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* arg) {
|
||||
auto& agents = *reinterpret_cast<std::vector<hsa_agent_t>*>(arg);
|
||||
const auto& ai = rocmtools::hsa_support::GetAgentInfo(agent.handle);
|
||||
if (ai.getType() == HSA_DEVICE_TYPE_GPU) {
|
||||
agents.emplace_back(agent);
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
|
||||
// create perfmon instances for the counter types specfied
|
||||
|
||||
// PCIE counters
|
||||
std::vector<std::string> pcie_counter_names;
|
||||
for (int i = 0; i < params_.counters_num; i++) {
|
||||
if(params_.counters[i].type == ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS)
|
||||
pcie_counter_names.push_back(params_.counters[i].name);
|
||||
}
|
||||
|
||||
if (pcie_counter_names.size() > 0) {
|
||||
auto agentInfo = rocmtools::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
|
||||
if(agentInfo.getName() == "gfx90a"){
|
||||
PciePerfMonMI200* perfmon = new PciePerfMonMI200(agentInfo);
|
||||
perfmon->SetCounterNames(pcie_counter_names);
|
||||
perfmon_instances_.push_back(perfmon);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CountersSampler::~CountersSampler() {
|
||||
// cleanup perfmon instancess
|
||||
for (auto &perfmon : perfmon_instances_) {
|
||||
if(perfmon != nullptr)
|
||||
delete perfmon;
|
||||
}
|
||||
// clean up libpcieaccess resources
|
||||
// TODO: should be part of mmio class in future
|
||||
if (pci_system_initialized_) {
|
||||
pci_system_cleanup();
|
||||
pci_system_initialized_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void CountersSampler::Start() {
|
||||
if (sampler_thread_.joinable()) { return; }
|
||||
|
||||
std::cout << "Sampler Start\n";
|
||||
// Start all Perfmons
|
||||
for (auto& perfmon : perfmon_instances_) {
|
||||
perfmon->Start();
|
||||
}
|
||||
|
||||
// Start polling thread
|
||||
keep_running_ = true;
|
||||
sampler_thread_ = std::thread([this]() { SamplerLoop(); });
|
||||
}
|
||||
|
||||
void CountersSampler::Stop() {
|
||||
if (!sampler_thread_.joinable()) { return; }
|
||||
|
||||
std::cout << "Sampler Stop\n";
|
||||
// Stop all Perfmons
|
||||
for (auto& perfmon : perfmon_instances_) {
|
||||
perfmon->Stop();
|
||||
}
|
||||
|
||||
// Stop polling thread
|
||||
keep_running_ = false;
|
||||
sampler_thread_.join();
|
||||
}
|
||||
|
||||
void CountersSampler::AddRecord(rocprofiler_record_counters_sampler_t &record) {
|
||||
const auto tool = rocmtools::GetROCMToolObj();
|
||||
const auto session = tool->GetSession(session_id_);
|
||||
const auto buffer = session->GetBuffer(buffer_id_);
|
||||
|
||||
std::lock_guard<std::mutex> lk(session->GetSessionLock());
|
||||
|
||||
record.header = {
|
||||
ROCPROFILER_COUNTERS_SAMPLER_RECORD,
|
||||
{ tool->GetUniqueRecordId() }
|
||||
};
|
||||
|
||||
// Add the record to the buffer(a deep-copy operation) along with
|
||||
// a lambda function to deep-copy the record.counters member to
|
||||
// the newly created buffer record
|
||||
buffer->AddRecord(
|
||||
record, record.counters,
|
||||
(record.num_counters * (sizeof(rocprofiler_counters_sampler_counter_output_t) + 1)),
|
||||
[](auto& buff_record, const void* data) {
|
||||
buff_record.counters = const_cast<rocprofiler_counters_sampler_counter_output_t*>(
|
||||
static_cast<const rocprofiler_counters_sampler_counter_output_t*>(data));
|
||||
});
|
||||
}
|
||||
|
||||
void CountersSampler::SamplerLoop() {
|
||||
std::this_thread::sleep_until(std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(params_.initial_delay));
|
||||
uint32_t elapsed = 0;
|
||||
while (keep_running_ && (elapsed <= params_.sampling_duration)) {
|
||||
auto next_tick =
|
||||
std::chrono::steady_clock::now() + std::chrono::milliseconds(params_.sampling_rate);
|
||||
|
||||
rocprofiler_record_counters_sampler_t record;
|
||||
std::vector<rocprofiler_counters_sampler_counter_output_t> values;
|
||||
for (auto& perfmon : perfmon_instances_){
|
||||
perfmon->Read(values);
|
||||
}
|
||||
record.counters =
|
||||
static_cast<rocprofiler_counters_sampler_counter_output_t*>(
|
||||
malloc(values.size() * sizeof(rocprofiler_counters_sampler_counter_output_t)));
|
||||
::memcpy(record.counters, &(values)[0],
|
||||
values.size() * sizeof(rocprofiler_counters_sampler_counter_output_t));
|
||||
record.num_counters = values.size();
|
||||
rocprofiler_counters_sampler_counter_output_t* record_counters = record.counters;
|
||||
AddRecord(record);
|
||||
free(record_counters);
|
||||
|
||||
std::this_thread::sleep_until(next_tick);
|
||||
elapsed += params_.sampling_rate;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocmtools
|
||||
@@ -0,0 +1,62 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_CORE_SESSION_COUNTERS_SAMPLER_H_
|
||||
#define SRC_CORE_SESSION_COUNTERS_SAMPLER_H_
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "src/core/counters/mmio/perfmon.h"
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
class CountersSampler {
|
||||
public:
|
||||
CountersSampler(rocprofiler_buffer_id_t buffer_id, rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_session_id_t session_id);
|
||||
~CountersSampler();
|
||||
|
||||
CountersSampler(const CountersSampler&) = delete;
|
||||
CountersSampler& operator=(const CountersSampler&) = delete;
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
void AddRecord(rocprofiler_record_counters_sampler_t& record);
|
||||
|
||||
private:
|
||||
void SamplerLoop();
|
||||
|
||||
rocprofiler_buffer_id_t buffer_id_;
|
||||
rocprofiler_filter_id_t filter_id_;
|
||||
rocprofiler_session_id_t session_id_;
|
||||
bool pci_system_initialized_{false};
|
||||
rocprofiler_counters_sampler_parameters_t params_;
|
||||
std::vector<PerfMon*> perfmon_instances_;
|
||||
|
||||
std::atomic<bool> keep_running_{false};
|
||||
std::thread sampler_thread_;
|
||||
};
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
#endif
|
||||
@@ -67,10 +67,15 @@ Filter::Filter(rocprofiler_filter_id_t id, rocprofiler_filter_kind_t filter_kind
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_COUNTERS_SAMPLER: {
|
||||
counters_sampler_parameters_ = filter_data.counters_sampler_parameters;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
warning(
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"profiler mode!\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -122,6 +127,16 @@ rocprofiler_spm_parameter_t* Filter::GetSpmParameterData() {
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"SPM collection mode!\n");
|
||||
}
|
||||
|
||||
rocprofiler_counters_sampler_parameters_t Filter::GetCountersSamplerParameterData() {
|
||||
if (kind_ == ROCPROFILER_COUNTERS_SAMPLER) {
|
||||
return counters_sampler_parameters_;
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Counters sampler mode!\n");
|
||||
}
|
||||
|
||||
void Filter::SetProperty(rocprofiler_filter_property_t property) {
|
||||
switch (property.kind) {
|
||||
case ROCPROFILER_FILTER_HSA_TRACER_API_FUNCTIONS: {
|
||||
|
||||
@@ -56,6 +56,7 @@ class Filter {
|
||||
rocprofiler_filter_property_kind_t kind);
|
||||
size_t GetPropertiesCount(rocprofiler_filter_property_kind_t kind);
|
||||
rocprofiler_spm_parameter_t* GetSpmParameterData();
|
||||
rocprofiler_counters_sampler_parameters_t GetCountersSamplerParameterData();
|
||||
|
||||
private:
|
||||
rocprofiler_filter_id_t id_;
|
||||
@@ -72,6 +73,7 @@ class Filter {
|
||||
std::vector<rocprofiler_tracer_activity_domain_t> tracer_apis_; // ROCTX/HIP/HSA API
|
||||
rocprofiler_spm_parameter_t* spm_parameter_; // spm parameter
|
||||
std::vector<rocprofiler_att_parameter_t> att_parameters_; // ATT Parameters
|
||||
rocprofiler_counters_sampler_parameters_t counters_sampler_parameters_; // sampled counters parameters
|
||||
|
||||
rocprofiler_sync_callback_t callback_;
|
||||
};
|
||||
|
||||
@@ -148,6 +148,16 @@ void Session::Start() {
|
||||
pc_sampler_->Start();
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
|
||||
if (!counters_sampler_started_.load(std::memory_order_release)) {
|
||||
counters_sampler_ = new CountersSampler(
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetBufferId(),
|
||||
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetId(), session_id_);
|
||||
counters_sampler_started_.exchange(true, std::memory_order_release);
|
||||
}
|
||||
counters_sampler_->Start();
|
||||
}
|
||||
|
||||
is_active_ = true;
|
||||
if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) startSpm();
|
||||
}
|
||||
@@ -179,6 +189,14 @@ void Session::Terminate() {
|
||||
}
|
||||
}
|
||||
|
||||
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
|
||||
if (counters_sampler_started_.load(std::memory_order_release)) {
|
||||
counters_sampler_->Stop();
|
||||
delete counters_sampler_;
|
||||
counters_sampler_started_.exchange(false, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
is_active_ = false;
|
||||
}
|
||||
}
|
||||
@@ -191,6 +209,7 @@ att::AttTracer* Session::GetAttTracer() { return att_tracer_; }
|
||||
tracer::Tracer* Session::GetTracer() { return tracer_; }
|
||||
spm::SpmCounters* Session::GetSpmCounter() { return spmcounter_; }
|
||||
pc_sampler::PCSampler* Session::GetPCSampler() { return pc_sampler_; }
|
||||
CountersSampler* Session::GetCountersSampler() { return counters_sampler_; }
|
||||
|
||||
rocprofiler_filter_id_t Session::CreateFilter(rocprofiler_filter_kind_t filter_kind,
|
||||
rocprofiler_filter_data_t filter_data,
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "att/att.h"
|
||||
#include "spm/spm.h"
|
||||
#include "src/pcsampler/session/pc_sampler.h"
|
||||
#include "counters_sampler.h"
|
||||
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
@@ -62,6 +63,7 @@ class Session {
|
||||
att::AttTracer* GetAttTracer();
|
||||
spm::SpmCounters* GetSpmCounter();
|
||||
pc_sampler::PCSampler* GetPCSampler();
|
||||
CountersSampler* GetCountersSampler();
|
||||
|
||||
// Filter
|
||||
rocprofiler_filter_id_t CreateFilter(rocprofiler_filter_kind_t filter_kind,
|
||||
@@ -117,6 +119,9 @@ class Session {
|
||||
std::atomic<bool> pc_sampler_started_{false};
|
||||
pc_sampler::PCSampler* pc_sampler_;
|
||||
|
||||
std::atomic<bool> counters_sampler_started_{false};
|
||||
CountersSampler* counters_sampler_;
|
||||
|
||||
std::atomic<uint64_t> buffers_counter_{1};
|
||||
std::mutex buffers_lock_;
|
||||
std::map<uint64_t, Memory::GenericBuffer*> buffers_;
|
||||
|
||||
@@ -23,6 +23,7 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
@@ -57,6 +58,7 @@ file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
|
||||
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
|
||||
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
|
||||
@@ -70,6 +72,7 @@ add_executable(runCoreUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/gtests_main.cpp
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_HW_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
@@ -81,6 +84,7 @@ add_executable(runCoreUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/gtests_main.cpp
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES}
|
||||
${OLD_LIB_SRC})
|
||||
|
||||
@@ -13,6 +13,7 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
@@ -52,6 +53,7 @@ file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/rocmtools/tools/t
|
||||
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
|
||||
add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/api/rocmtool_gtest.cpp
|
||||
@@ -59,6 +61,7 @@ add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_HW_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
@@ -70,6 +73,7 @@ add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user