SWDEV-297574: Support for pcie counters

Change-Id: I4a662c43a9d0cf883f336574baa09fc33b78b9af


[ROCm/rocprofiler commit: 3a639543e7]
This commit is contained in:
Saurabh Verma
2023-02-10 22:06:07 +00:00
committed by Ammar ELWazir
parent ff80dd4dfa
commit ef8eb4fef9
20 changed files with 1342 additions and 3 deletions
+77 -2
View File
@@ -479,7 +479,11 @@ typedef enum {
/**
* Represents SPM records
*/
ROCPROFILER_SPM_RECORD = 4
ROCPROFILER_SPM_RECORD = 4,
/**
* Represents Counters sampler records
*/
ROCPROFILER_COUNTERS_SAMPLER_RECORD = 5
} rocprofiler_record_kind_t;
/**
@@ -1789,7 +1793,11 @@ typedef enum {
/**
* HIP/HSA/ROCTX/SYS Trace.
*/
ROCPROFILER_API_TRACE = 6
ROCPROFILER_API_TRACE = 6,
/**
* Sampled Counters
*/
ROCPROFILER_COUNTERS_SAMPLER = 7
} rocprofiler_filter_kind_t;
/**
@@ -1895,6 +1903,69 @@ typedef struct {
} rocprofiler_spm_parameter_t;
typedef enum{
ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS = 0
} rocprofiler_counters_sampler_counter_type_t;
typedef struct{
char* name;
rocprofiler_counters_sampler_counter_type_t type;
} rocprofiler_counters_sampler_counter_input_t;
typedef struct{
rocprofiler_counters_sampler_counter_type_t type;
rocprofiler_record_counter_value_t value;
} rocprofiler_counters_sampler_counter_output_t;
typedef struct{
/**
* Counters to profile
*/
rocprofiler_counters_sampler_counter_input_t* counters;
/**
* Counters count
*/
int counters_num;
/**
* Sampling rate (ms)
*/
uint32_t sampling_rate;
/**
* Total sampling duration (ms); time between sampling start/stop
*/
uint32_t sampling_duration;
/**
* Initial delay (ms)
*/
uint32_t initial_delay;
/**
* Preferred agents to collect counters from
*/
int gpu_agent_index;
}rocprofiler_counters_sampler_parameters_t;
typedef struct{
/**
* ROCMtool General Record base header to identify the id and kind of every
* record
*/
rocprofiler_record_header_t header;
/**
* Agent Identifier to be used by the user to get the Agent Information using
* ::rocprofiler_query_agent_info
*/
rocprofiler_agent_id_t gpu_id;
/**
* Counters, including identifiers to get counter information and Counters
* values
*/
rocprofiler_counters_sampler_counter_output_t* counters;
/**
* Number of counter values
*/
uint32_t num_counters;
}rocprofiler_record_counters_sampler_t;
/**
* Filter Kind Data
*/
@@ -1915,6 +1986,10 @@ typedef union {
* spm counters parameters
*/
rocprofiler_spm_parameter_t* spm_parameters;
/**
* sampled counters parameters
*/
rocprofiler_counters_sampler_parameters_t counters_sampler_parameters;
} rocprofiler_filter_data_t;
/**
@@ -84,6 +84,16 @@ target_link_options(profiler_device_profiling PRIVATE "-Wl,--build-id=md5")
add_dependencies(samples profiler_device_profiling)
install(TARGETS profiler_device_profiling RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
## Build Counters Sampling example
set_source_files_properties(counters_sampler/pcie_counters_example.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(pcie_counters_sampler counters_sampler/pcie_counters_example.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(pcie_counters_sampler PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(pcie_counters_sampler PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples pcie_counters_sampler)
install(TARGETS pcie_counters_sampler RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
# ################################################################################################################
# ############################################################################################################################################
# Tracer Samples
# ############################################################################################################################################
+18 -1
View File
@@ -285,6 +285,16 @@ void FlushPCSamplingRecord(
<< std::endl;
}
void FlushCountersSamplerRecord(
const rocprofiler_record_counters_sampler_t *counters_sampler_record) {
for (uint32_t i = 0; i < counters_sampler_record->num_counters; i++) {
output_file << ",Counter_" << i << "("
<< std::to_string(counters_sampler_record->counters[i].value.value) << ")"
<< std::endl;
}
output_file << std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
@@ -308,9 +318,16 @@ int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofil
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default: {
case ROCPROFILER_COUNTERS_SAMPLER_RECORD: {
const rocprofiler_record_counters_sampler_t *counters_sampler_record =
reinterpret_cast<const rocprofiler_record_counters_sampler_t *>(begin);
FlushCountersSamplerRecord(counters_sampler_record);
break;
}
default: {
std::cout <<"unknown record\n";
break;
}
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
@@ -0,0 +1,103 @@
#include "../common/common.h"
int main(int argc, char** argv) {
int* gpuMem;
int counter_option = 0;
std::vector<std::string> pcie_counters = {
"CI_PERF_slv_MemRd_Bandwidth0", "CI_PERF_slv_MemWr_Bandwidth0", "CI_PERF_slv_totalMemRdTx",
"CI_PERF_slv_totalMemWrTx", "CI_PERF_slv_totalTx"};
if(argc > 1) {
counter_option = atoi(argv[1]);
}
else{
std::cout<< "Please provide one of the counter index options as argument:\n";
for(int i = 0; i < pcie_counters.size(); i++){
std::cout<< "[" << i << "]: " << pcie_counters[i] << std::endl;
}
std::cout<< "Example:\n ./pcie_counters_sampler 1\n";
exit(0);
}
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x999999, &buffer_id));
// Counters Sampler Filter
rocprofiler_filter_id_t filter_id;
[[maybe_unused]] rocprofiler_filter_property_t property = {};
rocprofiler_counters_sampler_counter_input_t counters_input[2] = {
{.name = const_cast<char*>(pcie_counters[counter_option].c_str()),
.type = ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS}};
uint32_t rate = 1000;
uint32_t duration = 5000;
rocprofiler_counters_sampler_parameters_t cs_parameters = {.counters = counters_input,
.counters_num = 1,
.sampling_rate = rate,
.sampling_duration = duration,
.gpu_agent_index = 0};
CHECK_ROCPROFILER(
rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_SAMPLER,
rocprofiler_filter_data_t{.counters_sampler_parameters = cs_parameters},
0, &filter_id, property));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
// Normal HIP Calls
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be profiled
kernelCalls('A');
kernelCalls('B');
std::cout << "Collecting samples for: " << pcie_counters[counter_option]
<< " ; sampling rate: " << rate << " ms; duration: " << duration << " ms" << std::endl;
// Activating the session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls
HIP_CALL(hipFree(gpuMem));
// allow sampler to run for 10 secs
sleep(6);
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
return 0;
}
@@ -174,10 +174,12 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
@@ -198,8 +200,10 @@ add_library(${ROCPROFILER_TARGET} SHARED
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${CORE_HSA_PACKETS_SRC_FILES}
${CORE_HSA_QUEUES_SRC_FILES}
@@ -0,0 +1,189 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "mmio.h"
#include <cstdint>
#include "../../../utils/helper.h"
#include "pcie_perfmon_registers_mi200.h"
namespace rocmtools {
namespace mmio {
void PrintFunctionPhase(const char* function_name, int phase){
#if defined(DEBUG_TRACE)
if (phase == 0)
std::clog << function_name << "() START" << std::endl;
else
std::clog << function_name << "() END" << std::endl;
#endif
}
void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char* function_name, int phase){
#if defined(DEBUG_TRACE)
if (phase == 0) {
PrintFunctionPhase(function_name, phase);
std::clog << "Old (index, data) : "<< std::hex << index_value << " " << data_value << std::endl;
}
else {
std::clog << "New (index, data) : "<< std::hex << index_value << " " << data_value << std::endl;
PrintFunctionPhase(function_name, phase);
}
#endif
}
MMIO::MMIO(const Agent::AgentInfo& info) : agent_info_(&info), pci_memory_(nullptr), type_(DEFAULT_MMAP) {
const auto pci_domain = agent_info_->getPCIDomain();
const auto pci_location_id = agent_info_->getPCILocationID();
pci_device_ =
pci_device_find_by_slot(pci_domain, pci_location_id >> 8, pci_location_id & 0xFF, 0);
if (!pci_device_ || pci_device_probe(pci_device_)) fatal("failed to probe the GPU device\n");
// Look for a region between 256KB and 4096KB, 32-bit, non IO, and non prefetchable.
for (size_t region = 0; region < sizeof(pci_device::regions) / sizeof(pci_device::regions[0]);
++region)
if (pci_device_->regions[region].is_64 == 0 &&
pci_device_->regions[region].is_prefetchable == 0 &&
pci_device_->regions[region].is_IO == 0 &&
pci_device_->regions[region].size >= (256UL * 1024) &&
pci_device_->regions[region].size <= (4096UL * 1024)) {
pci_memory_size_ = pci_device_->regions[region].size;
int err = pci_device_map_range(pci_device_, pci_device_->regions[region].base_addr,
pci_device_->regions[region].size, PCI_DEV_MAP_FLAG_WRITABLE,
(void**)&pci_memory_);
if(err)
fatal("failed to map the registers. Error code: %d\n", err);
}
if (pci_memory_ == nullptr) fatal("could not find the pci memory address\n");
SetIndexDataRegisters(INDIRECT_REG_INDEX, INDIRECT_REG_DATA);
}
MMIO::~MMIO() {
if (pci_memory_)
{
int err = pci_device_unmap_range(pci_device_, pci_memory_, pci_memory_size_);
if(err) warning("failed to unmap the pci memory. Error code: %d\n", err);
}
}
bool MMIO::RegisterWriteAPI(uint32_t reg_offset, uint32_t value){
// access the mmap
// write register offset to index register 0x38 of index/data pair (indirect addressing)
// write register bits to data register 0x3c of index/data pair (indirect addressing)
// std::lock_guard<std::mutex> lock(mutex_);
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 0);
// TODO: should work only if map is created
*index_reg_addr = reg_offset;
*data_reg_addr = value;
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 1);
return true;
}
bool MMIO::RegisterReadAPI(uint32_t reg_offset, uint32_t& value){
// access the mmap
// write register offset to index register 0x38 of index/data pair (indirect addressing)
// read register bits to data register 0x3c of index/data pair (indirect addressing)
// std::lock_guard<std::mutex> lock(mutex_);
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 0);
// TODO: should work only if map is created
*index_reg_addr = reg_offset;
// TODO: add delay here??
value = *data_reg_addr;
PrintRegisterData(*index_reg_addr, *data_reg_addr, __FUNCTION__, 1);
return true;
}
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
MMIO* mmio = nullptr;
switch (type) {
case PCIE_PERFMON: {
mmio = GetMMIOInstance(type, info);
if(mmio == nullptr){
mmio = dynamic_cast<MMIO*>(new PciePerfmonMMIO(info));
AddInstance(mmio);
}
break;
}
case DF_PERFMON: {
break;
}
case UMC_PERFMON: {
break;
}
case DEFAULT_MMAP: {
break;
}
}
return mmio;
}
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info) {
MMIO* mmio = nullptr;
auto it = mmio_instances_.find(info.getHandle());
if(it != mmio_instances_.end()){
for(auto& mmio_instance: it->second){
if(mmio_instance->Type() == type){
mmio = mmio_instance;
}
}
}
return mmio;
}
void MMIOManager::AddInstance(MMIO* in_mmio_instance) {
uint64_t handle = in_mmio_instance->GetAgentInfo().getHandle();
mmio_instances_[handle].push_back(in_mmio_instance);
}
void MMIOManager::DestroyMMIOInstance(MMIO* in_mmio_instance) {
if(in_mmio_instance == nullptr)
return;
uint64_t handle = in_mmio_instance->GetAgentInfo().getHandle();
auto it = mmio_instances_.find(handle);
if(it != mmio_instances_.end()){
auto& mmio_array = it->second;
// find instance in the array and remove it from the array
mmio_array.erase(std::remove(mmio_array.begin(), mmio_array.end(), in_mmio_instance), mmio_array.end());
}
delete in_mmio_instance;
}
std::map<decltype(hsa_agent_t::handle), std::vector<MMIO*>> MMIOManager::mmio_instances_;
} // namespace mmio
} // namespace rocmtools
@@ -0,0 +1,122 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_CORE_NON_GFXIP_COUNTERS_MMIO_H
#define SRC_CORE_NON_GFXIP_COUNTERS_MMIO_H
#include <hsa/hsa.h>
#include "src/core/hardware/hsa_info.h"
#include <pciaccess.h>
#include <mutex>
#include <iostream>
#include <unistd.h>
#include <sstream>
namespace rocmtools {
namespace mmio {
#define FUNCTION_START() PrintFunctionPhase(__FUNCTION__, 0)
#define FUNCTION_END() PrintFunctionPhase(__FUNCTION__, 1)
// uncomment below to see register write sequences
// #define DEBUG_TRACE = 1
void PrintFunctionPhase(const char* function_name, int phase);
void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char* function_name, int phase);
// Index/Data registers
const static uint32_t INDIRECT_REG_INDEX = 0x38;
const static uint32_t INDIRECT_REG_DATA = 0x3c;
typedef enum { DEFAULT_MMAP, DF_PERFMON, UMC_PERFMON, PCIE_PERFMON } mmap_type_t;
class MMIOManager;
class MMIO {
public:
virtual bool RegisterWriteAPI(uint32_t reg_offset, uint32_t value);
virtual bool RegisterReadAPI(uint32_t reg_offset, uint32_t& value);
virtual void SetIndexDataRegisters(const uint32_t index_reg, const uint32_t data_reg) {
index_reg_addr = (uint32_t*)((char*)pci_memory_ + index_reg);
data_reg_addr = (uint32_t*)((char*)pci_memory_ + data_reg);
}
MMIO(MMIO& other) = delete;
void operator=(const MMIO&) = delete;
virtual ~MMIO();
friend class MMIOManager;
const Agent::AgentInfo& GetAgentInfo() { return *agent_info_; }
mmap_type_t Type() { return type_; }
protected:
MMIO(const Agent::AgentInfo& info);
// default constructor; helpful for derived classes
// which want to setup mmio construction differently
MMIO(){ type_ = DEFAULT_MMAP; };
const Agent::AgentInfo* agent_info_;
struct pci_device* pci_device_;
size_t pci_memory_size_;
uint32_t* pci_memory_;
mmap_type_t type_;
uint32_t* index_reg_addr;
uint32_t* data_reg_addr;
};
// PciePerfmonMMIO has same mmio setup approach as
// done in MMIO class
class PciePerfmonMMIO : public MMIO{
public:
friend class MMIOManager;
protected:
PciePerfmonMMIO(const Agent::AgentInfo& info): MMIO(info) {
type_ = PCIE_PERFMON;
};
};
/*
Class to manage mmio for UMC/DF/PCIe etc.
The mmio approach for the different IPs may
be same or different. For eg: UMC and PCIe share
the same mmio and index/data registers
*/
class MMIOManager {
public:
static MMIO* CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info);
static MMIO* GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info);
static void DestroyMMIOInstance(MMIO* instance);
private:
static void AddInstance(MMIO* instance);
static std::map<decltype(hsa_agent_t::handle), std::vector<MMIO*>> mmio_instances_;
};
} // namespace mmio
} // namespace rocmtools
#endif
@@ -0,0 +1,116 @@
#include "pcie_counters_mi200.h"
#include "pcie_perfmon_registers_mi200.h"
#include "perfmon.h"
namespace rocmtools {
PciePerfMonMI200::PciePerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
mmio_ =
dynamic_cast<mmio::PciePerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::PCIE_PERFMON, info));
}
PciePerfMonMI200::~PciePerfMonMI200() {
mmio::MMIOManager::DestroyMMIOInstance(dynamic_cast<mmio::MMIO*>(mmio_));
}
void PciePerfMonMI200::writeRegister(uint32_t reg_offset, uint32_t value){
// mmio or ioctl approaches
mmio_->RegisterWriteAPI(reg_offset, value);
}
void PciePerfMonMI200::readRegister(uint32_t reg_offset, uint32_t& value){
// mmio or ioctl approaches
mmio_->RegisterReadAPI(reg_offset, value);
}
void PciePerfMonMI200::SetCounterNames(std::vector<std::string>& counter_names) {
counter_names_ = counter_names;
// TODO: only one event at a time is supported at the moment
auto it = PCIE_MI200::pcie_events_table.find(counter_names[0]);
if (it != PCIE_MI200::pcie_events_table.end()) {
PCIE_MI200::pcie_event_t event_desc = it->second;
if (event_desc.event_category == PCIE_MI200::RX_TILE_SCLK) {
event_id_ = event_desc.event_id;
}
}
}
void PciePerfMonMI200::Start(){
// TODO: make sure values stored in table
// in registers header are dec and not hex
Start_RX_TILE_SCLK(event_id_);
}
void PciePerfMonMI200::Stop(){
// TODO: revisit correct value to stop
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x2); // stop
}
void PciePerfMonMI200::Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values){
uint64_t val=0;
Read_RX_TILE_SCLK(val);
rocprofiler_counters_sampler_counter_output_t value = {
ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS,
static_cast<double>(val)
};
values.push_back(value);
}
void PciePerfMonMI200::Start_RX_TILE_TXCLK(uint32_t event){
// Step 1: PORT SEL update
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_EVENT_CI_PORT_SEL, 0x0);
// Step 2: EVENT SEL update
uint32_t value = event; // last 8 bits for event
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_TXCLK3, value);
// Steps 3 & 4: Performance counters initialization, enable:
// TODO: revisit. Just a single write with 0x3 might be enough (check with pcie team)
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x5);
}
void PciePerfMonMI200::Read_RX_TILE_TXCLK(uint64_t& result){
// Step 5: Performance counters read:
uint32_t lo_val, hi_val;
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_TXCLK3, lo_val);
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_UPVAL_TXCLK3, hi_val);
// Combine the lo and hi values and put them in result
uint64_t val = (hi_val & 0xFFFFUL);
val = val << 32;
result = val | lo_val;
}
void PciePerfMonMI200::Start_RX_TILE_SCLK(uint32_t event){
// Step 1: PORT SEL update
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_EVENT_CI_PORT_SEL, 0x0);
// Step 2: EVENT SEL update
uint32_t value = event; // last 8 bits for event
writeRegister(PCIE_MI200::PCIE_PERF_CNTL_LCLK1, value);
// Steps 3 & 4: Performance counters initialization, enable:
// TODO: revisit. Just a single write with 0x3 might be enough (check with pcie team)
writeRegister(PCIE_MI200::PCIE_PERF_COUNT_CNTL, 0x5);
}
void PciePerfMonMI200::Read_RX_TILE_SCLK(uint64_t& result){
// Step 5: Performance counters read:
uint32_t lo_val, hi_val;
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_LCLK1, lo_val);
readRegister(PCIE_MI200::PCIE_PERF_COUNT0_UPVAL_LCLK1, hi_val);
// Combine the lo and hi values and put them in result
uint64_t val = (hi_val & 0xFFFFUL);
val = val << 32;
result = val | lo_val;
}
} // namespace rocmtools
@@ -0,0 +1,43 @@
#ifndef PCIE_COUNTERS_MI200_H
#define PCIE_COUNTERS_MI200_H
#include "mmio.h"
#include "perfmon.h"
namespace rocmtools {
/*
One perfmon per GPU.
Only one instance per GPU, per process
*/
class PciePerfMonMI200 : public PerfMon {
public:
PciePerfMonMI200(const Agent::AgentInfo& info);
~PciePerfMonMI200();
void SetCounterNames(std::vector<std::string>& counter_names) override;
void Start() override;
void Stop() override;
void Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values) override;
mmio::mmap_type_t Type() override { return mmio::mmap_type_t::PCIE_PERFMON; }
private:
// TODO : check google coding std
void writeRegister(uint32_t reg_offset, uint32_t value);
void readRegister(uint32_t reg_offset, uint32_t& value);
void Start_RX_TILE_TXCLK(uint32_t event);
void Read_RX_TILE_TXCLK(uint64_t& result);
void Start_RX_TILE_SCLK(uint32_t event);
void Read_RX_TILE_SCLK(uint64_t& result);
private:
mmio::PciePerfmonMMIO* mmio_;
std::vector<std::string> counter_names_;
int event_id_;
};
} // namespace rocmtools
#endif
@@ -0,0 +1,327 @@
#ifndef PCIE_PERFMON_REGISTERS_MI200_H
#define PCIE_PERFMON_REGISTERS_MI200_H
#include <stdint.h>
namespace PCIE_MI200 {
// -------- RX Tile TXCLK Start --------
// Step 1: PORT SEL update
const static uint32_t PCIE_PERF_CNTL_EVENT_CI_PORT_SEL = 0x11180250;
// Step 2: EVENT SEL update
const static uint32_t PCIE_PERF_CNTL_TXCLK1 = 0x11180204;
const static uint32_t PCIE_PERF_CNTL_TXCLK2 = 0x11180210;
const static uint32_t PCIE_PERF_CNTL_TXCLK3 = 0x1118021C; //#
const static uint32_t PCIE_PERF_CNTL_TXCLK4 = 0x11180228; //#
const static uint32_t PCIE_PERF_CNTL_TXCLK5 = 0x11180258;
const static uint32_t PCIE_PERF_CNTL_TXCLK6 = 0x11180264;
const static uint32_t PCIE_PERF_CNTL_TXCLK7 = 0x11180888;
const static uint32_t PCIE_PERF_CNTL_TXCLK8 = 0x11180894;
const static uint32_t PCIE_PERF_CNTL_TXCLK9 = 0x111808A0;
const static uint32_t PCIE_PERF_CNTL_TXCLK10 = 0x111808AC;
// Steps 3 & 4: Performance counters initialization, enable:
const static uint32_t PCIE_PERF_COUNT_CNTL = 0x11180200;
// Step 5: Performance counters read:
const static uint32_t PCIE_PERF_COUNT0_TXCLK1 = 0x11180208;
const static uint32_t PCIE_PERF_COUNT0_TXCLK2 = 0x11180214;
const static uint32_t PCIE_PERF_COUNT0_TXCLK3 = 0x11180220; //#
const static uint32_t PCIE_PERF_COUNT0_TXCLK4 = 0x1118022C; //#
const static uint32_t PCIE_PERF_COUNT0_TXCLK5 = 0x1118025C;
const static uint32_t PCIE_PERF_COUNT0_TXCLK6 = 0x11180268;
const static uint32_t PCIE_PERF_COUNT0_TXCLK7 = 0x1118088C;
const static uint32_t PCIE_PERF_COUNT0_TXCLK8 = 0x11180898;
const static uint32_t PCIE_PERF_COUNT0_TXCLK9 = 0x111808A4;
const static uint32_t PCIE_PERF_COUNT0_TXCLK10 = 0x111808B0;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK1 = 0x111808E8;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK2 = 0x111808F0;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK3 = 0x111808F8; //#
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK4 = 0x11180900; //#
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK5 = 0x11180908;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK6 = 0x11180910;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK7 = 0x11180918;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK8 = 0x11180920;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK9 = 0x11180928;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_TXCLK10 = 0x11180930;
const static uint32_t PCIE_PERF_COUNT1_TXCLK1 = 0x1118020C;
const static uint32_t PCIE_PERF_COUNT1_TXCLK2 = 0x11180218;
const static uint32_t PCIE_PERF_COUNT1_TXCLK3 = 0x11180224; //#
const static uint32_t PCIE_PERF_COUNT1_TXCLK4 = 0x11180230; //#
const static uint32_t PCIE_PERF_COUNT1_TXCLK5 = 0x11180260;
const static uint32_t PCIE_PERF_COUNT1_TXCLK6 = 0x1118026C;
const static uint32_t PCIE_PERF_COUNT1_TXCLK7 = 0x11180890;
const static uint32_t PCIE_PERF_COUNT1_TXCLK8 = 0x1118089C;
const static uint32_t PCIE_PERF_COUNT1_TXCLK9 = 0x111808A8;
const static uint32_t PCIE_PERF_COUNT1_TXCLK10 = 0x111808B4;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK1 = 0x111808EC;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK2 = 0x111808F4;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK3 = 0x111808FC; //#
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK4 = 0x11180904; //#
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK5 = 0x1118090C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK6 = 0x11180914;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK7 = 0x1118091C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK8 = 0x11180924;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK9 = 0x1118092C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_TXCLK10 = 0x11180934;
// -------- RX Tile TXCLK End --------
// -------- RX Tile SCLK Start --------
// Step 1: PORT SEL update
// PCIE_PERF_CNTL_EVENT_CI_PORT_SEL
// Step 2: EVENT SEL update
const static uint32_t PCIE_PERF_CNTL_LCLK1 = 0x11180234;
const static uint32_t PCIE_PERF_CNTL_LCLK2 = 0x11180240;
const static uint32_t PCIE_PERF_CNTL_LCLK3 = 0x11180270;
const static uint32_t PCIE_PERF_CNTL_LCLK4 = 0x1118027C;
const static uint32_t PCIE_PERF_CNTL_LCLK5 = 0x111808B8;
const static uint32_t PCIE_PERF_CNTL_LCLK6 = 0x111808C4;
const static uint32_t PCIE_PERF_CNTL_LCLK7 = 0x111808D0;
const static uint32_t PCIE_PERF_CNTL_LCLK8 = 0x111808DC;
// Step 5: Performance counters read:
const static uint32_t PCIE_PERF_COUNT0_LCLK1 = 0x11180238;
const static uint32_t PCIE_PERF_COUNT0_LCLK2 = 0x11180244;
const static uint32_t PCIE_PERF_COUNT0_LCLK3 = 0x11180274;
const static uint32_t PCIE_PERF_COUNT0_LCLK4 = 0x11180280;
const static uint32_t PCIE_PERF_COUNT0_LCLK5 = 0x111808BC;
const static uint32_t PCIE_PERF_COUNT0_LCLK6 = 0x111808C8;
const static uint32_t PCIE_PERF_COUNT0_LCLK7 = 0x111808D4;
const static uint32_t PCIE_PERF_COUNT0_LCLK8 = 0x111808E0;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK1 = 0x11180938;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK2 = 0x11180940;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK3 = 0x11180948;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK4 = 0x11180950;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK5 = 0x11180958;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK6 = 0x11180960;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK7 = 0x11180968;
const static uint32_t PCIE_PERF_COUNT0_UPVAL_LCLK8 = 0x11180970;
const static uint32_t PCIE_PERF_COUNT1_LCLK1 = 0x1118023C;
const static uint32_t PCIE_PERF_COUNT1_LCLK2 = 0x11180248;
const static uint32_t PCIE_PERF_COUNT1_LCLK3 = 0x11180278;
const static uint32_t PCIE_PERF_COUNT1_LCLK4 = 0x11180284;
const static uint32_t PCIE_PERF_COUNT1_LCLK5 = 0x111808C0;
const static uint32_t PCIE_PERF_COUNT1_LCLK6 = 0x111808CC;
const static uint32_t PCIE_PERF_COUNT1_LCLK7 = 0x111808D8;
const static uint32_t PCIE_PERF_COUNT1_LCLK8 = 0x111808E4;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK1 = 0x1118093C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK2 = 0x11180944;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK3 = 0x1118094C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK4 = 0x11180954;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK5 = 0x1118095C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK6 = 0x11180964;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK7 = 0x1118096C;
const static uint32_t PCIE_PERF_COUNT1_UPVAL_LCLK8 = 0x11180974;
// -------- RX Tile SCLK End ----------
typedef enum{
TX_TILE_TXCLK = 0,
TX_TILE_SCLK = 1,
RX_TILE_TXCLK = 2,
RX_TILE_SCLK = 3,
LC_TILE_TXCLK = 4
}pcie_event_category_t;
struct pcie_event_t{
pcie_event_t(int id, pcie_event_category_t cat): event_id(id), event_category(cat){}
int event_id;
pcie_event_category_t event_category;
};
const static std::map<std::string, pcie_event_t> pcie_events_table = {
{"RX_PERF_RXP_RX_TailEdb_A[0]", {2, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEdb_A[1]", {3, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEdb_A[2]", {4, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEdb_A[3]", {5, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEnd_A[0]", {6, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEnd_A[1]", {7, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEnd_A[2]", {8, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_TailEnd_A[3]", {9, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadSdp_A[0]", {10, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadSdp_A[1]", {11, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadSdp_A[2]", {12, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadSdp_A[3]", {13, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadStp_A[0]", {14, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadStp_A[1]", {15, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadStp_A[2]", {16, RX_TILE_TXCLK}},
{"RX_PERF_RXP_RX_HeadStp_A[3]", {17, RX_TILE_TXCLK}},
{"RX_PERF_RXCRC_nullified_tlp_A", {18, RX_TILE_TXCLK}},
{"RX_PERF_RXCRC_valid_crc_A", {19, RX_TILE_TXCLK}},
{"RX_PERF_RXCRC_invalid_crc_A", {20, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_vendor_type1_A", {21, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_vendor_type0_A", {22, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_set_slot_power_limit_A", {23, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_unlock_A", {24, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_err_fatal_A", {25, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_err_nonfatal_A", {26, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_err_corr_A", {27, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_pme_to_ack_A", {28, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_pme_turn_off_A", {29, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_pm_pme_A", {30, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_pm_active_state_nak_A", {31, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_deassert_intd_A", {32, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_deassert_intc_A", {33, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_deassert_intb_A", {34, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_deassert_inta_A", {35, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_assert_intd_A", {36, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_assert_intc_A", {37, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_assert_intb_A", {38, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_assert_inta_A", {39, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_valid_A", {40, RX_TILE_TXCLK}},
{"RX_PERF_RMSG_unsupported_A", {41, RX_TILE_TXCLK}},
{"RX_PERF_RCB_unexpected_cpl_A", {42, RX_TILE_TXCLK}},
{"RX_PERF_RCB_timeout_cpl_A", {43, RX_TILE_TXCLK}},
{"RX_PERF_HDS_tlphdrvalid_A", {44, RX_TILE_TXCLK}},
{"RX_PERF_HDS_tlpdatavalid_A", {45, RX_TILE_TXCLK}},
{"RX_PERF_GAN_bad_tlp_A", {46, RX_TILE_TXCLK}},
{"RX_PERF_GAN_nak_A", {47, RX_TILE_TXCLK}},
{"RX_PERF_GAN_ack_A", {48, RX_TILE_TXCLK}},
{"RX_PERF_FE_unsupported_req_A", {49, RX_TILE_TXCLK}},
{"RX_PERF_FE_unsupported_cpl_A", {50, RX_TILE_TXCLK}},
{"RX_PERF_FE_unexpected_cpl_A", {51, RX_TILE_TXCLK}},
{"RX_PERF_FE_poisoned_tlp_A", {52, RX_TILE_TXCLK}},
{"RX_PERF_FE_poisoned_cpl_A", {53, RX_TILE_TXCLK}},
{"RX_PERF_FE_malformed_tlp_A", {54, RX_TILE_TXCLK}},
{"RX_PERF_FE_cpl_abort_A", {55, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_MSG_A", {56, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_CFG_WR_A", {57, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_CFG_RD_A", {58, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_IO_WR_A", {59, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_IO_RD_A", {60, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_MEM_WR_A", {61, RX_TILE_TXCLK}},
{"RX_PERF_FE_request_MEM_RD_A", {62, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_MST_gt16_A", {63, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_MST_9to16_A", {64, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_MST_5to8_A", {65, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_MST_2to4_A", {66, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_MST_1_A", {67, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_gt32_A", {68, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_17to32_A", {69, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_9to16_A", {70, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_5to8_A", {71, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_2to4_A", {72, RX_TILE_TXCLK}},
{"RX_PERF_FE_length_SLV_1_A", {73, RX_TILE_TXCLK}},
{"RX_PERF_FE_cpl_status_CA_A", {74, RX_TILE_TXCLK}},
{"RX_PERF_FE_cpl_status_CRS_A", {75, RX_TILE_TXCLK}},
{"RX_PERF_FE_cpl_status_UR_A", {76, RX_TILE_TXCLK}},
{"RX_PERF_FE_cpl_status_SC_A", {77, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_pm_active_state_request_l1_A", {78, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_pm_request_ack_A", {79, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_pm_enter_l23_A", {80, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_pm_enter_l1_A", {81, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_error_A", {82, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_crc_err_A", {83, RX_TILE_TXCLK}},
{"SB_PERF_FCC_npd_0", {84, RX_TILE_TXCLK}},
{"SB_PERF_FCC_pd_0", {85, RX_TILE_TXCLK}},
{"SB_PERF_FCC_nph_0", {86, RX_TILE_TXCLK}},
{"SB_PERF_FCC_ph_0", {87, RX_TILE_TXCLK}},
{"SB_PERF_fail_crc_rd_hdr_0", {88, RX_TILE_TXCLK}},
{"SB_PERF_pass_crc_rd_hdr_0", {89, RX_TILE_TXCLK}},
{"SB_PERF_fail_crc_wr_hdr_0", {90, RX_TILE_TXCLK}},
{"SB_PERF_pass_crc_wr_hdr_0", {91, RX_TILE_TXCLK}},
{"SB_PERF_fail_crc_data_0", {92, RX_TILE_TXCLK}},
{"SB_PERF_pass_crc_data_0", {93, RX_TILE_TXCLK}},
{"SB_PERF_invalid_crc_0", {94, RX_TILE_TXCLK}},
{"SB_PERF_valid_crc_0", {95, RX_TILE_TXCLK}},
{"SB_PERF_rd_hdr_WEN_0", {96, RX_TILE_TXCLK}},
{"SB_PERF_wr_hdr_WEN_0", {97, RX_TILE_TXCLK}},
{"SB_PERF_data_WEN_0", {98, RX_TILE_TXCLK}},
{"SB_PERF_non_post_rd_from_FE", {99, RX_TILE_TXCLK}},
{"SB_PERF_non_post_wr_from_FE", {100, RX_TILE_TXCLK}},
{"SB_PERF_post_req_from_FE", {101, RX_TILE_TXCLK}},
{"SB_PERF_non_post_rd_from_FE_0", {102, RX_TILE_TXCLK}},
{"SB_PERF_non_post_wr_from_FE_0", {103, RX_TILE_TXCLK}},
{"SB_PERF_post_req_from_FE_0", {104, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_nak_A", {111, RX_TILE_TXCLK}},
{"RX_PERF_DLLP_ack_A", {112, RX_TILE_TXCLK}},
{"RX_PERF_allErrors_A", {113, RX_TILE_TXCLK}},
{"perf_PG_COUNT", {175, RX_TILE_TXCLK}},
{"perf_NOT_POWER_GATED", {176, RX_TILE_TXCLK}},
{"perf_POWER_GATED", {177, RX_TILE_TXCLK}},
{"SB_PERF_non_post_rd_to_HI", {2, RX_TILE_SCLK}},
{"SB_PERF_non_post_wr_to_HI", {3, RX_TILE_SCLK}},
{"SB_PERF_post_req_to_HI", {4, RX_TILE_SCLK}},
{"SB_PERF_non_post_rd_to_HI_0", {5, RX_TILE_SCLK}},
{"SB_PERF_non_post_wr_to_HI_0", {6, RX_TILE_SCLK}},
{"SB_PERF_post_req_to_HI_0", {7, RX_TILE_SCLK}},
{"SB_PERF_rd_hdr_REN_0", {8, RX_TILE_SCLK}},
{"SB_PERF_wr_hdr_REN_0", {9, RX_TILE_SCLK}},
{"SB_PERF_data_REN_0", {10, RX_TILE_SCLK}},
{"SB_PERF_rd_hdr_empty_0", {11, RX_TILE_SCLK}},
{"SB_PERF_wr_hdr_empty_0", {12, RX_TILE_SCLK}},
{"SB_PERF_data_empty_0", {13, RX_TILE_SCLK}},
{"CI_PERF_slv_total128BRdCpl", {29, RX_TILE_SCLK}},
{"CI_PERF_slv_total32BMemRdTx", {30, RX_TILE_SCLK}},
{"CI_PERF_slv_total64BMemRdTx", {31, RX_TILE_SCLK}},
{"CI_PERF_slv_total16BMemWrTx", {32, RX_TILE_SCLK}},
{"CI_PERF_slv_total32BMemWrTx", {33, RX_TILE_SCLK}},
{"CI_PERF_slv_total64BMemWrTx", {34, RX_TILE_SCLK}},
{"CI_PERF_slv_totalTx", {35, RX_TILE_SCLK}},
{"CI_PERF_slv_stallGrantGen", {36, RX_TILE_SCLK}},
{"CI_PERF_slv_totalGrant", {37, RX_TILE_SCLK}},
{"CI_PERF_slv_txPending", {38, RX_TILE_SCLK}},
{"CI_PERF_slv_numMemRdLT32B", {39, RX_TILE_SCLK}},
{"CI_PERF_slv_numMemRdLT16B", {40, RX_TILE_SCLK}},
{"CI_PERF_slv_totalMemTx", {41, RX_TILE_SCLK}},
{"CI_PERF_slv_totalMemRdTx", {42, RX_TILE_SCLK}},
{"CI_PERF_slv_totalMemWrTx", {43, RX_TILE_SCLK}},
{"CI_PERF_slv_numGrant0", {44, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntOverFlow_ns0", {45, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntUnderFlow_ns0", {46, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntOverFlow_s0", {47, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntUnderFlow_s0", {48, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntOverFlow0", {49, RX_TILE_SCLK}},
{"CI_PERF_slv_portCntUnderFlow0", {50, RX_TILE_SCLK}},
{"CI_PERF_slv_npNotAccepted_ns0", {51, RX_TILE_SCLK}},
{"CI_PERF_slv_npNotAccepted_s0", {52, RX_TILE_SCLK}},
{"CI_PERF_slv_num128BRdCpl0", {53, RX_TILE_SCLK}},
{"CI_PERF_slv_num32BMemRdTx0", {54, RX_TILE_SCLK}},
{"CI_PERF_slv_num64BMemRdTx0", {55, RX_TILE_SCLK}},
{"CI_PERF_slv_num16BMemWrTx0", {56, RX_TILE_SCLK}},
{"CI_PERF_slv_num32BMemWrTx0", {57, RX_TILE_SCLK}},
{"CI_PERF_slv_num64BMemWrTx0", {58, RX_TILE_SCLK}},
{"CI_PERF_slv_MemRd_Bandwidth0", {59, RX_TILE_SCLK}},
{"CI_PERF_slv_MemWr_Bandwidth0", {60, RX_TILE_SCLK}},
{"TX_PERF_S_RCLK_s_tag_buf_empty", {61, RX_TILE_SCLK}},
{"P_request_latency_500ns_or_more", {62, RX_TILE_SCLK}},
{"P_request_latency_250_to_500ns", {63, RX_TILE_SCLK}},
{"P_request_latency_100_to_250ns", {64, RX_TILE_SCLK}},
{"P_request_latency_100ns_or_less", {65, RX_TILE_SCLK}},
{"NP_request_latency_500ns_or_more", {66, RX_TILE_SCLK}},
{"NP_request_latency_250_to_500ns", {67, RX_TILE_SCLK}},
{"NP_request_latency_100_to_250ns", {68, RX_TILE_SCLK}},
{"NP_request_latency_100ns_or_less", {69, RX_TILE_SCLK}},
{"CI_PERF_slv_MemRd_wait_for_cpl_slot[0]", {70, RX_TILE_SCLK}},
{"CI_PERF_slv_MemRd_wait_for_tag[0]", {71, RX_TILE_SCLK}},
{"CI_PERF_slv_MemRd_wait_for_d_credit[0]", {72, RX_TILE_SCLK}},
{"CI_PERF_slv_MemRd_wait_for_h_credit[0]", {73, RX_TILE_SCLK}},
{"CI_PERF_slv_MemWr_wait_for_tag[0]", {74, RX_TILE_SCLK}},
{"CI_PERF_slv_MemWr_wait_for_d_credit[0]", {75, RX_TILE_SCLK}},
{"CI_PERF_slv_MemWr_wait_for_h_credit[0]", {76, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC1_no_tags_q", {77, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC1_data_credits_q", {78, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC1_req_credits_q", {79, RX_TILE_SCLK}},
{"CISLV_PERF_no_cpl_slots_q[0]", {80, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC0_no_tags_q", {81, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC0_data_credits_q", {82, RX_TILE_SCLK}},
{"CISLV_PERF_no_VC0_req_credits_q", {83, RX_TILE_SCLK}}
};
}
#endif
@@ -0,0 +1,47 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_CORE_COUNTERS_PERFMON_H
#define SRC_CORE_COUNTERS_PERFMON_H
#include "inc/rocprofiler.h"
#include "mmio.h"
#include <vector>
namespace rocmtools {
class PerfMon {
public:
virtual ~PerfMon(){};
virtual void Start() = 0;
virtual void Stop() = 0;
virtual void Read(std::vector<rocprofiler_counters_sampler_counter_output_t>& values) = 0;
virtual void SetCounterNames(std::vector<std::string>& counter_names) {
counter_names_ = counter_names;
};
virtual mmio::mmap_type_t Type() = 0;
protected:
std::vector<std::string> counter_names_;
};
} // namespace rocmtools
#endif
@@ -220,11 +220,18 @@ bool GetNextRecord(const rocprofiler_record_header_t* record,
*next = reinterpret_cast<const rocprofiler_record_header_t*>(att_tracer_record + 1);
break;
}
case ROCPROFILER_COUNTERS_SAMPLER_RECORD: {
const rocprofiler_record_counters_sampler_t* sampler_record =
reinterpret_cast<const rocprofiler_record_counters_sampler_t*>(record);
*next = reinterpret_cast<const rocprofiler_record_header_t*>(sampler_record + 1);
break;
}
default:
const rocprofiler_record_tracer_t* tracer_record =
reinterpret_cast<const rocprofiler_record_tracer_t*>(record);
*next = reinterpret_cast<const rocprofiler_record_header_t*>(tracer_record + 1);
// size_to_add = sizeof(rocprofiler_record_header_t);
break;
}
// const std::byte* ptr = reinterpret_cast<const std::byte*>(record);
// ptr += size_to_add;
@@ -0,0 +1,168 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "counters_sampler.h"
#include "src/core/hsa/hsa_support.h"
#include "src/api/rocmtool.h"
#include "src/core/counters/mmio/pcie_counters_mi200.h"
namespace rocmtools {
CountersSampler::CountersSampler(
rocprofiler_buffer_id_t buffer_id,
rocprofiler_filter_id_t filter_id,
rocprofiler_session_id_t session_id)
: buffer_id_(buffer_id)
, filter_id_(filter_id)
, session_id_(session_id)
, pci_system_initialized_(pci_system_init() == 0)
{
params_ = rocmtools::GetROCMToolObj()
->GetSession(session_id_)
->GetFilter(filter_id_)
->GetCountersSamplerParameterData();
std::vector<hsa_agent_t> agents;
rocmtools::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* arg) {
auto& agents = *reinterpret_cast<std::vector<hsa_agent_t>*>(arg);
const auto& ai = rocmtools::hsa_support::GetAgentInfo(agent.handle);
if (ai.getType() == HSA_DEVICE_TYPE_GPU) {
agents.emplace_back(agent);
}
return HSA_STATUS_SUCCESS;
},
&agents);
// create perfmon instances for the counter types specfied
// PCIE counters
std::vector<std::string> pcie_counter_names;
for (int i = 0; i < params_.counters_num; i++) {
if(params_.counters[i].type == ROCPROFILER_COUNTERS_SAMPLER_PCIE_COUNTERS)
pcie_counter_names.push_back(params_.counters[i].name);
}
if (pcie_counter_names.size() > 0) {
auto agentInfo = rocmtools::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
if(agentInfo.getName() == "gfx90a"){
PciePerfMonMI200* perfmon = new PciePerfMonMI200(agentInfo);
perfmon->SetCounterNames(pcie_counter_names);
perfmon_instances_.push_back(perfmon);
}
}
}
CountersSampler::~CountersSampler() {
// cleanup perfmon instancess
for (auto &perfmon : perfmon_instances_) {
if(perfmon != nullptr)
delete perfmon;
}
// clean up libpcieaccess resources
// TODO: should be part of mmio class in future
if (pci_system_initialized_) {
pci_system_cleanup();
pci_system_initialized_ = false;
}
}
void CountersSampler::Start() {
if (sampler_thread_.joinable()) { return; }
std::cout << "Sampler Start\n";
// Start all Perfmons
for (auto& perfmon : perfmon_instances_) {
perfmon->Start();
}
// Start polling thread
keep_running_ = true;
sampler_thread_ = std::thread([this]() { SamplerLoop(); });
}
void CountersSampler::Stop() {
if (!sampler_thread_.joinable()) { return; }
std::cout << "Sampler Stop\n";
// Stop all Perfmons
for (auto& perfmon : perfmon_instances_) {
perfmon->Stop();
}
// Stop polling thread
keep_running_ = false;
sampler_thread_.join();
}
void CountersSampler::AddRecord(rocprofiler_record_counters_sampler_t &record) {
const auto tool = rocmtools::GetROCMToolObj();
const auto session = tool->GetSession(session_id_);
const auto buffer = session->GetBuffer(buffer_id_);
std::lock_guard<std::mutex> lk(session->GetSessionLock());
record.header = {
ROCPROFILER_COUNTERS_SAMPLER_RECORD,
{ tool->GetUniqueRecordId() }
};
// Add the record to the buffer(a deep-copy operation) along with
// a lambda function to deep-copy the record.counters member to
// the newly created buffer record
buffer->AddRecord(
record, record.counters,
(record.num_counters * (sizeof(rocprofiler_counters_sampler_counter_output_t) + 1)),
[](auto& buff_record, const void* data) {
buff_record.counters = const_cast<rocprofiler_counters_sampler_counter_output_t*>(
static_cast<const rocprofiler_counters_sampler_counter_output_t*>(data));
});
}
void CountersSampler::SamplerLoop() {
std::this_thread::sleep_until(std::chrono::steady_clock::now() +
std::chrono::milliseconds(params_.initial_delay));
uint32_t elapsed = 0;
while (keep_running_ && (elapsed <= params_.sampling_duration)) {
auto next_tick =
std::chrono::steady_clock::now() + std::chrono::milliseconds(params_.sampling_rate);
rocprofiler_record_counters_sampler_t record;
std::vector<rocprofiler_counters_sampler_counter_output_t> values;
for (auto& perfmon : perfmon_instances_){
perfmon->Read(values);
}
record.counters =
static_cast<rocprofiler_counters_sampler_counter_output_t*>(
malloc(values.size() * sizeof(rocprofiler_counters_sampler_counter_output_t)));
::memcpy(record.counters, &(values)[0],
values.size() * sizeof(rocprofiler_counters_sampler_counter_output_t));
record.num_counters = values.size();
rocprofiler_counters_sampler_counter_output_t* record_counters = record.counters;
AddRecord(record);
free(record_counters);
std::this_thread::sleep_until(next_tick);
elapsed += params_.sampling_rate;
}
}
} // namespace rocmtools
@@ -0,0 +1,62 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_CORE_SESSION_COUNTERS_SAMPLER_H_
#define SRC_CORE_SESSION_COUNTERS_SAMPLER_H_
#include "rocprofiler.h"
#include "src/core/counters/mmio/perfmon.h"
#include <atomic>
#include <thread>
#include <mutex>
#include <vector>
namespace rocmtools {
class CountersSampler {
public:
CountersSampler(rocprofiler_buffer_id_t buffer_id, rocprofiler_filter_id_t filter_id,
rocprofiler_session_id_t session_id);
~CountersSampler();
CountersSampler(const CountersSampler&) = delete;
CountersSampler& operator=(const CountersSampler&) = delete;
void Start();
void Stop();
void AddRecord(rocprofiler_record_counters_sampler_t& record);
private:
void SamplerLoop();
rocprofiler_buffer_id_t buffer_id_;
rocprofiler_filter_id_t filter_id_;
rocprofiler_session_id_t session_id_;
bool pci_system_initialized_{false};
rocprofiler_counters_sampler_parameters_t params_;
std::vector<PerfMon*> perfmon_instances_;
std::atomic<bool> keep_running_{false};
std::thread sampler_thread_;
};
} // namespace rocmtools
#endif
@@ -67,10 +67,15 @@ Filter::Filter(rocprofiler_filter_id_t id, rocprofiler_filter_kind_t filter_kind
}
break;
}
case ROCPROFILER_COUNTERS_SAMPLER: {
counters_sampler_parameters_ = filter_data.counters_sampler_parameters;
break;
}
default: {
warning(
"Error: ROCProfiler filter specified is not supported for "
"profiler mode!\n");
break;
}
}
}
@@ -122,6 +127,16 @@ rocprofiler_spm_parameter_t* Filter::GetSpmParameterData() {
"Error: ROCProfiler filter specified is not supported for "
"SPM collection mode!\n");
}
rocprofiler_counters_sampler_parameters_t Filter::GetCountersSamplerParameterData() {
if (kind_ == ROCPROFILER_COUNTERS_SAMPLER) {
return counters_sampler_parameters_;
}
fatal(
"Error: ROCMtools filter specified is not supported for "
"Counters sampler mode!\n");
}
void Filter::SetProperty(rocprofiler_filter_property_t property) {
switch (property.kind) {
case ROCPROFILER_FILTER_HSA_TRACER_API_FUNCTIONS: {
@@ -56,6 +56,7 @@ class Filter {
rocprofiler_filter_property_kind_t kind);
size_t GetPropertiesCount(rocprofiler_filter_property_kind_t kind);
rocprofiler_spm_parameter_t* GetSpmParameterData();
rocprofiler_counters_sampler_parameters_t GetCountersSamplerParameterData();
private:
rocprofiler_filter_id_t id_;
@@ -72,6 +73,7 @@ class Filter {
std::vector<rocprofiler_tracer_activity_domain_t> tracer_apis_; // ROCTX/HIP/HSA API
rocprofiler_spm_parameter_t* spm_parameter_; // spm parameter
std::vector<rocprofiler_att_parameter_t> att_parameters_; // ATT Parameters
rocprofiler_counters_sampler_parameters_t counters_sampler_parameters_; // sampled counters parameters
rocprofiler_sync_callback_t callback_;
};
@@ -148,6 +148,16 @@ void Session::Start() {
pc_sampler_->Start();
}
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
if (!counters_sampler_started_.load(std::memory_order_release)) {
counters_sampler_ = new CountersSampler(
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetBufferId(),
GetFilter(GetFilterIdWithKind(ROCPROFILER_COUNTERS_SAMPLER))->GetId(), session_id_);
counters_sampler_started_.exchange(true, std::memory_order_release);
}
counters_sampler_->Start();
}
is_active_ = true;
if (FindFilterWithKind(ROCPROFILER_SPM_COLLECTION)) startSpm();
}
@@ -179,6 +189,14 @@ void Session::Terminate() {
}
}
if (FindFilterWithKind(ROCPROFILER_COUNTERS_SAMPLER)) {
if (counters_sampler_started_.load(std::memory_order_release)) {
counters_sampler_->Stop();
delete counters_sampler_;
counters_sampler_started_.exchange(false, std::memory_order_release);
}
}
is_active_ = false;
}
}
@@ -191,6 +209,7 @@ att::AttTracer* Session::GetAttTracer() { return att_tracer_; }
tracer::Tracer* Session::GetTracer() { return tracer_; }
spm::SpmCounters* Session::GetSpmCounter() { return spmcounter_; }
pc_sampler::PCSampler* Session::GetPCSampler() { return pc_sampler_; }
CountersSampler* Session::GetCountersSampler() { return counters_sampler_; }
rocprofiler_filter_id_t Session::CreateFilter(rocprofiler_filter_kind_t filter_kind,
rocprofiler_filter_data_t filter_data,
@@ -40,6 +40,7 @@
#include "att/att.h"
#include "spm/spm.h"
#include "src/pcsampler/session/pc_sampler.h"
#include "counters_sampler.h"
#define ASSERTM(exp, msg) assert(((void)msg, exp))
@@ -62,6 +63,7 @@ class Session {
att::AttTracer* GetAttTracer();
spm::SpmCounters* GetSpmCounter();
pc_sampler::PCSampler* GetPCSampler();
CountersSampler* GetCountersSampler();
// Filter
rocprofiler_filter_id_t CreateFilter(rocprofiler_filter_kind_t filter_kind,
@@ -117,6 +119,9 @@ class Session {
std::atomic<bool> pc_sampler_started_{false};
pc_sampler::PCSampler* pc_sampler_;
std::atomic<bool> counters_sampler_started_{false};
CountersSampler* counters_sampler_;
std::atomic<uint64_t> buffers_counter_{1};
std::mutex buffers_lock_;
std::map<uint64_t, Memory::GenericBuffer*> buffers_;
@@ -23,6 +23,7 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
@@ -57,6 +58,7 @@ file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
@@ -70,6 +72,7 @@ add_executable(runCoreUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/gtests_main.cpp
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_HW_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${ROCPROFILER_SRC_FILES}
@@ -81,6 +84,7 @@ add_executable(runCoreUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/gtests_main.cpp
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES}
${OLD_LIB_SRC})
@@ -13,6 +13,7 @@ set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
@@ -52,6 +53,7 @@ file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/rocmtools/tools/t
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest.cpp
${CMAKE_CURRENT_SOURCE_DIR}/api/rocmtool_gtest.cpp
@@ -59,6 +61,7 @@ add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_HW_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${ROCPROFILER_SPM_SRC_FILES}
@@ -70,6 +73,7 @@ add_executable(runProfilerUnitTests ${CMAKE_CURRENT_SOURCE_DIR}/tools/tool_gtest
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES})