SWDEV-399631: Converting into singleton class

Rocprofiler and HSAsupport classes have been implemented as
singletons which gets initialized lazily.

Change-Id: I98db4713c7282d88966aeb0ea9df83ba457b2ea3


[ROCm/rocprofiler commit: 4980409c5a]
Этот коммит содержится в:
Sriraksha Nagaraj
2023-04-12 11:26:18 -05:00
родитель 162eba6642
Коммит ca53c3f18d
45 изменённых файлов: 1995 добавлений и 1335 удалений
+2 -1
Просмотреть файл
@@ -72,7 +72,7 @@ if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
if [ -z "$RUN_TEST" ] ; then RUN_TEST=no; fi
if [ -z "$ASAN" ] ; then ASAN=False; fi
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
@@ -96,6 +96,7 @@ cmake \
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
-DHIP_ROOT_DIR=${ROCM_PATH} \
$ROCPROFILER_ROOT
popd
+59 -13
Просмотреть файл
@@ -23,6 +23,7 @@
#include <sys/syscall.h>
#include <unistd.h>
#include <experimental/filesystem>
#include <atomic>
#include <optional>
#include <thread>
@@ -34,6 +35,7 @@
#include "src/utils/logger.h"
#include "src/core/memory/generic_buffer.h"
namespace fs = std::experimental::filesystem;
#define ASSERTM(exp, msg) assert(((void)msg, exp))
extern std::mutex sessions_pending_signal_lock;
@@ -42,14 +44,37 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); }
namespace rocprofiler {
ROCProfiler_Singleton* rocprofiler_singleton;
// Constructor of ROCProfiler
// Takes the buffer size, a buffer callback function and a buffer flush
// interval to allocate a buffer pool using GenericStorage Also takes the
// replay mode (application replay/kernel replay/user replay) to set the replay
// mode for the rocprofiler class object
ROCProfiler_Singleton::ROCProfiler_Singleton() : current_session_id_(rocprofiler_session_id_t{0}) {}
ROCProfiler_Singleton::ROCProfiler_Singleton() {
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
if (!fs::exists(sysfs_nodes_path))
rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
fs::path node_path = dirp_entry.path();
long long node_id = std::stoll(dirp_entry.path().stem().string());
fs::path gpu_path = node_path / "gpu_id";
std::ifstream gpu_id_file(gpu_path.c_str());
std::string gpu_id_str;
if (gpu_id_file.is_open()) {
gpu_id_file >> gpu_id_str;
if (!gpu_id_str.empty()) {
long long gpu_id = std::stoll(gpu_id_str);
if (gpu_id > 0) {
Agent::DeviceInfo deviceInfo(node_id, gpu_id);
// Since it is in static initializer, so its protected
agent_device_map_.emplace(deviceInfo.getGPUId(), deviceInfo);
}
}
}
}
}
// Destructor of rocprofiler_singleton
// deletes the buffer pool
@@ -68,7 +93,11 @@ ROCProfiler_Singleton::~ROCProfiler_Singleton() {
}
Counter::ClearBasicCounters();
}
ROCProfiler_Singleton& ROCProfiler_Singleton::GetInstance() {
static ROCProfiler_Singleton* rocprofiler_singleton_instance =
new ROCProfiler_Singleton;
return *rocprofiler_singleton_instance;
}
bool ROCProfiler_Singleton::FindAgent(rocprofiler_agent_id_t agent_id) { return true; }
size_t ROCProfiler_Singleton::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind,
rocprofiler_agent_id_t agent_id) {
@@ -210,28 +239,45 @@ const char* ROCProfiler_Singleton::GetKernelInfo(rocprofiler_kernel_info_kind_t
}
}
const Agent::DeviceInfo& ROCProfiler_Singleton::GetDeviceInfo(uint64_t gpu_id) {
std::lock_guard<std::mutex> info_map_lock(agent_device_map_mutex_);
auto it = agent_device_map_.find(gpu_id);
if (it == agent_device_map_.end())
rocprofiler::fatal("Device Info is not found for the given id:%ld", gpu_id);
return it->second;
}
// TODO(aelwazir): To be implemented
bool ROCProfiler_Singleton::CheckFilterData(rocprofiler_filter_kind_t filter_kind,
rocprofiler_filter_data_t filter_data) {
return true;
}
// End of ROCProfiler_Singleton Class
rocprofiler_timestamp_t ROCProfiler_Singleton::timestamp_ns() {
ROCProfiler_Singleton* GetROCProfilerSingleton() { return rocprofiler_singleton; }
static uint64_t sys_clock_period = 0;
struct timespec ts;
// We are not full on memory model. At worst each CPU can call it once.
// We don't care for this variable's value in global memory as long as we have
// non-zero in the CPU cache.
if (sys_clock_period == 0) {
clock_getres(CLOCK_BOOTTIME, &ts);
sys_clock_period = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
}
void InitROCProfilerSingleton() { rocprofiler_singleton = new ROCProfiler_Singleton; }
void ResetROCProfilerSingleton() {
delete rocprofiler_singleton;
// TODO(aelwazir): We need to use std::optional or std::unique_ptr
// if (rocprofiler_singleton) rocprofiler_singleton.reset();
clock_gettime(CLOCK_BOOTTIME, &ts);
uint64_t time = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
if (sys_clock_period != 1)
return rocprofiler_timestamp_t{time / sys_clock_period};
else
return rocprofiler_timestamp_t{time};
}
rocprofiler_timestamp_t GetCurrentTimestamp() { return hsa_support::GetCurrentTimestampNS(); }
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) {
if (hsa_support::IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
if (hsa_support_IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
return ROCPROFILER_STATUS_ERROR;
}
// End of ROCProfiler_Singleton Class
} // namespace rocprofiler
+11 -10
Просмотреть файл
@@ -41,13 +41,15 @@
#include "src/core/session/session.h"
#include "src/core/session/device_profiling.h"
#include "src/core/hardware/hsa_info.h"
namespace rocprofiler {
class ROCProfiler_Singleton {
public:
ROCProfiler_Singleton();
~ROCProfiler_Singleton();
ROCProfiler_Singleton(const ROCProfiler_Singleton&) = delete;
ROCProfiler_Singleton& operator=(const ROCProfiler_Singleton&) = delete;
static ROCProfiler_Singleton& GetInstance();
bool FindAgent(rocprofiler_agent_id_t agent_id);
size_t GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
@@ -84,7 +86,8 @@ class ROCProfiler_Singleton {
rocprofiler_filter_data_t filter_data);
uint64_t GetUniqueRecordId();
uint64_t GetUniqueKernelDispatchId();
const Agent::DeviceInfo& GetDeviceInfo(uint64_t gpu_id);
rocprofiler_timestamp_t timestamp_ns();
private:
rocprofiler_session_id_t current_session_id_{0};
std::mutex session_map_lock_;
@@ -92,6 +95,11 @@ class ROCProfiler_Singleton {
std::atomic<uint64_t> records_counter_{1};
std::mutex device_profiling_session_map_lock_;
std::map<uint64_t, DeviceProfileSession*> dev_profiling_sessions_;
std::mutex agent_device_map_mutex_;
std::unordered_map<uint64_t, Agent::DeviceInfo> agent_device_map_;
ROCProfiler_Singleton();
~ROCProfiler_Singleton();
/*
* XXX: Associating PC samples with a running kernel requires an identifier
* that will be unique across all kernel executions. It is not enough to use
@@ -110,14 +118,7 @@ class ROCProfiler_Singleton {
std::atomic<uint64_t> kernel_dispatch_counter_{1};
};
void InitROCProfilerSingleton();
void ResetROCProfilerSingleton();
ROCProfiler_Singleton* GetROCProfilerSingleton();
rocprofiler_timestamp_t GetCurrentTimestamp();
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
} // namespace rocprofiler
#endif // SRC_TOOLS_ROCPROFILER_SINGLETON_H_
+110 -111
Просмотреть файл
@@ -132,7 +132,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
API_METHOD_PREFIX
if (api_started.load(std::memory_order_relaxed))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED);
rocprofiler::InitROCProfilerSingleton();
rocprofiler::ROCProfiler_Singleton::GetInstance();
api_started.exchange(true, std::memory_order_release);
API_METHOD_SUFFIX
}
@@ -140,14 +140,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
// Finalize the API
ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() {
API_INIT_CHECKER
rocprofiler::ResetROCProfilerSingleton();
api_started.exchange(false, std::memory_order_release);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) {
API_INIT_CHECKER
*timestamp = rocprofiler::GetCurrentTimestamp();
*timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns();
if (timestamp->value <= 0)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE);
API_METHOD_SUFFIX
@@ -163,20 +162,21 @@ rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info_size(
rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, size_t* data_size) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindAgent(agent_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
*data_size = rocprofiler::GetROCProfilerSingleton()->GetAgentInfoSize(kind, agent_id);
if (*data_size <= 0)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
*data_size = rocprofiler_singleton.GetAgentInfoSize(kind, agent_id);
if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(
rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, const char** data) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindAgent(agent_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetAgentInfo(kind, agent_id)))
if (!(*data = rocprofiler_singleton.GetAgentInfo(kind, agent_id)))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
@@ -184,20 +184,21 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info_size(
rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, size_t* data_size) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindQueue(queue_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
*data_size = rocprofiler::GetROCProfilerSingleton()->GetQueueInfoSize(kind, queue_id);
if (*data_size <= 0)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
*data_size = rocprofiler_singleton.GetQueueInfoSize(kind, queue_id);
if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(
rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, const char** data) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindQueue(queue_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetQueueInfo(kind, queue_id)))
if (!(*data = rocprofiler_singleton.GetQueueInfo(kind, queue_id)))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
@@ -205,9 +206,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(
rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, size_t* data_size) {
API_INIT_CHECKER
// if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id))
// if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id))
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
*data_size = rocprofiler::GetROCProfilerSingleton()->GetKernelInfoSize(kind, kernel_id);
*data_size = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfoSize(kind, kernel_id);
if (*data_size <= 0)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
API_METHOD_SUFFIX
@@ -216,9 +217,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info(
rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, const char** data) {
API_INIT_CHECKER
// if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id))
// if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id))
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetKernelInfo(kind, kernel_id)))
if (!(*data = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfo(kind, kernel_id)))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
@@ -227,14 +228,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info_size(
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
rocprofiler_counter_id_t counter_id, size_t* data_size) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetProfiler()
->FindCounter(counter_id))
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
*data_size = rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetProfiler()
*data_size = rocprofiler_singleton
.GetSession(session_id)
->GetProfiler()
->GetCounterInfoSize(kind, counter_id);
if (*data_size <= 0)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
@@ -245,13 +244,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info(
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
rocprofiler_counter_id_t counter_id, const char** data) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetProfiler()
->FindCounter(counter_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
if (!(*data = rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
if (!(*data = rocprofiler_singleton
.GetSession(session_id)
->GetProfiler()
->GetCounterInfo(kind, counter_id)))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
@@ -280,14 +277,12 @@ rocprofiler_query_tracer_operation_id(rocprofiler_tracer_activity_domain_t domai
ROCPROFILER_API rocprofiler_status_t rocprofiler_flush_data(rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetBuffer(buffer_id)
->Flush())
if (!rocprofiler_singleton.GetSession(session_id)->GetBuffer(buffer_id)->Flush())
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER);
API_METHOD_SUFFIX
}
@@ -298,9 +293,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(
const rocprofiler_record_header_t* record, const rocprofiler_record_header_t** next,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!Memory::GetNextRecord(record, next))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED);
@@ -311,7 +307,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_session(
rocprofiler_replay_mode_t replay_mode, rocprofiler_session_id_t* session_id) {
API_INIT_CHECKER
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode);
*session_id = rocprofiler::ROCProfiler_Singleton::GetInstance().CreateSession(replay_mode);
API_METHOD_SUFFIX
}
@@ -322,15 +318,16 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(
API_INIT_CHECKER
// TODO(aelwazir): CheckFilterData to be implemented
// int error_code =
// rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind,
// rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind,
// filter_data);
// if (error_code == -1) throw
// rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0)
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
*filter_id = rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
*filter_id = rocprofiler_singleton
.GetSession(session_id)
->CreateFilter(filter_kind, filter_data, data_count, property);
API_METHOD_SUFFIX
}
@@ -338,11 +335,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_filter(rocprofiler_session_id_t session_id,
rocprofiler_filter_id_t filter_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyFilter(filter_id);
rocprofiler_singleton.GetSession(session_id)->DestroyFilter(filter_id);
API_METHOD_SUFFIX
}
@@ -350,10 +348,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_buffer(
rocprofiler_session_id_t session_id, rocprofiler_buffer_callback_t buffer_callback,
size_t buffer_size, rocprofiler_buffer_id_t* buffer_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
*buffer_id = rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
*buffer_id = rocprofiler_singleton
.GetSession(session_id)
->CreateBuffer(buffer_callback, buffer_size);
API_METHOD_SUFFIX
}
@@ -362,12 +361,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id,
rocprofiler_buffer_property_t* buffer_properties, uint32_t buffer_properties_count) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
rocprofiler_singleton
.GetSession(session_id)
->GetBuffer(buffer_id)
->SetProperties(buffer_properties, buffer_properties_count);
API_METHOD_SUFFIX
@@ -376,11 +376,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyBuffer(buffer_id);
rocprofiler_singleton.GetSession(session_id)->DestroyBuffer(buffer_id);
API_METHOD_SUFFIX
}
@@ -388,20 +389,18 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_filter_buffer(
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
if (!rocprofiler_singleton
.GetSession(session_id)
->CheckFilterBufferSize(filter_id, buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetFilter(filter_id)
->SetBufferId(buffer_id);
rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetBufferId(buffer_id);
API_METHOD_SUFFIX
}
@@ -409,19 +408,15 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_api_trace_sync_callback(
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
rocprofiler_sync_callback_t callback) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
if (rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetFilter(filter_id)
->GetKind() != ROCPROFILER_API_TRACE)
if (rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->GetKind() !=
ROCPROFILER_API_TRACE)
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED);
rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetFilter(filter_id)
->SetCallback(callback);
rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetCallback(callback);
API_METHOD_SUFFIX
}
@@ -433,30 +428,31 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
API_INIT_CHECKER
// TODO(aelwazir): CheckFilterData to be implemented
// int error_code =
// rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind,
// rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind,
// filter_data);
// if (error_code == -1) throw
// rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0)
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode);
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
*session_id = rocprofiler_singleton.CreateSession(replay_mode);
rocprofiler_filter_id_t filter_id =
rocprofiler::GetROCProfilerSingleton()
->GetSession(*session_id)
rocprofiler_singleton
.GetSession(*session_id)
->CreateFilter(filter_kind, filter_data, data_count, property);
rocprofiler_buffer_id_t buffer_id = rocprofiler::GetROCProfilerSingleton()
->GetSession(*session_id)
->CreateBuffer(buffer_callback, buffer_size);
rocprofiler_buffer_id_t buffer_id = rocprofiler_singleton
.GetSession(*session_id)
->CreateBuffer(buffer_callback, buffer_size);
if (filter_kind == ROCPROFILER_API_TRACE)
rocprofiler::GetROCProfilerSingleton()
->GetSession(*session_id)
rocprofiler_singleton
.GetSession(*session_id)
->GetFilter(filter_id)
->SetCallback(callback);
if (!rocprofiler::GetROCProfilerSingleton()
->GetSession(*session_id)
if (!rocprofiler_singleton
.GetSession(*session_id)
->CheckFilterBufferSize(filter_id, buffer_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
rocprofiler::GetROCProfilerSingleton()
->GetSession(*session_id)
rocprofiler_singleton
.GetSession(*session_id)
->GetFilter(filter_id)
->SetBufferId(buffer_id);
API_METHOD_SUFFIX
@@ -466,9 +462,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
ROCPROFILER_API rocprofiler_status_t
rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
rocprofiler::GetROCProfilerSingleton()->DestroySession(session_id);
rocprofiler_singleton.DestroySession(session_id);
API_METHOD_SUFFIX
}
@@ -476,16 +473,17 @@ rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
ROCPROFILER_API rocprofiler_status_t
rocprofiler_start_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasFilter())
if (!rocprofiler_singleton.GetSession(session_id)->HasFilter())
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER);
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasBuffer())
if (!rocprofiler_singleton.GetSession(session_id)->HasBuffer())
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER);
if (rocprofiler::GetROCProfilerSingleton()->HasActiveSession())
if (rocprofiler_singleton.HasActiveSession())
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION);
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Start();
rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(session_id);
rocprofiler_singleton.GetSession(session_id)->Start();
rocprofiler_singleton.SetCurrentActiveSession(session_id);
API_METHOD_SUFFIX
}
@@ -493,12 +491,13 @@ rocprofiler_start_session(rocprofiler_session_id_t session_id) {
ROCPROFILER_API rocprofiler_status_t
rocprofiler_terminate_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (!rocprofiler_singleton.FindSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocprofiler::GetROCProfilerSingleton()->IsActiveSession(session_id))
if (!rocprofiler_singleton.IsActiveSession(session_id))
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE);
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Terminate();
rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(rocprofiler_session_id_t{0});
rocprofiler_singleton.GetSession(session_id)->Terminate();
rocprofiler_singleton.SetCurrentActiveSession(rocprofiler_session_id_t{0});
API_METHOD_SUFFIX
}
@@ -507,8 +506,8 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create
int cpu_index, int gpu_index) {
API_METHOD_PREFIX
std::vector<std::string> counters(counter_names, counter_names + num_counters);
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateDeviceProfilingSession(
counters, cpu_index, gpu_index);
*session_id =
rocprofiler::ROCProfiler_Singleton::GetInstance().CreateDeviceProfilingSession(counters, cpu_index, gpu_index);
API_METHOD_SUFFIX
}
@@ -516,7 +515,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StartSession();
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StartSession();
API_METHOD_SUFFIX
}
@@ -524,7 +523,7 @@ rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id)
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
rocprofiler_session_id_t session_id, rocprofiler_device_profile_metric_t* data) {
API_METHOD_PREFIX
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->PollMetrics(data);
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->PollMetrics(data);
API_METHOD_SUFFIX
}
@@ -532,7 +531,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StopSession();
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StopSession();
API_METHOD_SUFFIX
}
@@ -540,7 +539,7 @@ rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocprofiler::GetROCProfilerSingleton()->DestroyDeviceProfilingSession(session_id);
rocprofiler::ROCProfiler_Singleton::GetInstance().DestroyDeviceProfilingSession(session_id);
API_METHOD_SUFFIX
}
@@ -563,7 +562,7 @@ rocprofiler_codeobj_capture_create(
uint64_t userdata
) {
API_METHOD_PREFIX
id->handle = rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId();
id->handle = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
codeobj_record::make_capture(*id, mode, userdata);
API_METHOD_SUFFIX
}
@@ -618,7 +617,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
if (started) rocprofiler::fatal("HSA Tool started already!");
started = true;
rocprofiler::hsa_support::Initialize(table);
rocprofiler::HSASupport_Singleton::GetInstance().HSAInitialize(table);
return true;
}
@@ -627,7 +626,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
*/
ROCPROFILER_EXPORT void OnUnload() {
if (!started) rocprofiler::fatal("HSA Tool hasn't started yet!");
rocprofiler::hsa_support::Finalize();
rocprofiler::HSASupport_Singleton::GetInstance().HSAFinalize();
started = false;
}
+10 -6
Просмотреть файл
@@ -1,6 +1,6 @@
#include "eval_metrics.h"
#include "src/utils/helper.h"
#include "src/core/hsa/hsa_common.h"
#include "src/core/hsa/hsa_support.h"
#include <set>
#include <math.h>
@@ -93,16 +93,20 @@ bool metrics::ExtractMetricEvents(
results_list holds the result objects for each event (which means, basic counters only)
*/
try {
uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount();
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
uint32_t xcc_count = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
for (size_t i = 0; i < metric_names.size(); i++) {
counters_vec_t counters_vec;
// TODO: saurabh
// const Metric* metric = metrics_dict->GetMetricByName(metric_names[i]);
const Metric* metric = metrics_dict->Get(metric_names[i]);
if (metric == nullptr) {
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
fatal("input metric '%s' not supported on this hardware: %s ", metric_names[i].c_str(),
agentInfo.getName().data());
HSAAgentInfo& agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle);
fatal("input metric'%s' not supported on this hardware: %s ", metric_names[i].c_str(),
agentInfo.GetDeviceInfo().getName().data());
}
// adding result object for derived metric
@@ -176,7 +180,7 @@ bool metrics::ExtractMetricEvents(
bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent,
std::vector<results_t*>& results_list) {
uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount();
uint32_t xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
uint32_t single_xcc_buff_size = profile->output_buffer.size / (sizeof(uint64_t) * xcc_count);
callback_data_t callback_data{&results_list, 0, single_xcc_buff_size};
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data);
+14 -12
Просмотреть файл
@@ -44,6 +44,8 @@ THE SOFTWARE.
#include <mutex>
#include <unordered_set>
#include "src/core/hardware/hsa_info.h"
#include "src/core/hsa/hsa_support.h"
namespace fs = std::experimental::filesystem;
namespace rocprofiler {
@@ -121,10 +123,10 @@ class MetricsDict {
const cache_t* const cache_;
};
static MetricsDict* Create(const Agent::AgentInfo* agent_info) {
static MetricsDict* Create(const rocprofiler::HSAAgentInfo* agent_info) {
std::lock_guard<mutex_t> lck(mutex_);
if (map_ == NULL) map_ = new map_t;
std::string name = agent_info->getGfxip();
std::string name = agent_info->GetDeviceInfo().getGfxip();
auto ret = map_->insert({name, NULL});
if (ret.second) ret.first->second = new MetricsDict(agent_info);
return ret.first->second;
@@ -195,7 +197,7 @@ class MetricsDict {
return (xml_ != NULL) ? xml_->GetNodes("top." + scope + ".metric") : xml::Xml::nodes_t();
}
MetricsDict(const Agent::AgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) {
MetricsDict(const rocprofiler::HSAAgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) {
std::string xml_name = []() {
if (const char* path = getenv("ROCPROFILER_METRICS_PATH"); path != nullptr) return path;
return "";
@@ -208,14 +210,14 @@ class MetricsDict {
}
xml_ = xml::Xml::Create(xml_name);
if (xml_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metrics .xml open error '" << xml_name << "'");
xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->getMaxQueueSize());
xml_->AddConst("top.const.metric", "CU_NUM", agent_info->getCUCount());
xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->GetDeviceInfo().getMaxQueueSize());
xml_->AddConst("top.const.metric", "CU_NUM", agent_info->GetDeviceInfo().getCUCount());
xml_->AddConst("top.const.metric", "SIMD_NUM",
agent_info->getSimdCountPerCU() * agent_info->getCUCount());
xml_->AddConst("top.const.metric", "SE_NUM", agent_info->getShaderEngineCount());
agent_info->GetDeviceInfo().getSimdCountPerCU() * agent_info->GetDeviceInfo().getCUCount());
xml_->AddConst("top.const.metric", "SE_NUM", agent_info->GetDeviceInfo().getShaderEngineCount());
xml_->AddConst("top.const.metric", "LDS_BANKS", 32);
ImportMetrics(agent_info, "const");
agent_name_ = agent_info->getName();
agent_name_ = agent_info->GetDeviceInfo().getName();
if (agent_name_.find(':') != std::string::npos) // Remove compiler flags from the agent_name
agent_name_ = agent_name_.substr(0, agent_name_.find(':'));
@@ -233,7 +235,7 @@ class MetricsDict {
if (supported_agent_names.find(agent_name_) != supported_agent_names.end()) {
ImportMetrics(agent_info, agent_name_);
} else {
agent_name_ = agent_info->getGfxip();
agent_name_ = agent_info->GetDeviceInfo().getGfxip();
ImportMetrics(agent_info, agent_name_);
}
ImportMetrics(agent_info, "global");
@@ -244,7 +246,7 @@ class MetricsDict {
for (auto& entry : cache_) delete entry.second;
}
static hsa_ven_amd_aqlprofile_id_query_t Translate(const Agent::AgentInfo* agent_info,
static hsa_ven_amd_aqlprofile_id_query_t Translate(const rocprofiler::HSAAgentInfo* agent_info,
const std::string& block_name) {
hsa_ven_amd_aqlprofile_profile_t profile{};
profile.agent = hsa_agent_t{agent_info->getHandle()};
@@ -256,7 +258,7 @@ class MetricsDict {
return query;
}
void ImportMetrics(const Agent::AgentInfo* agent_info, const std::string& scope) {
void ImportMetrics(const rocprofiler::HSAAgentInfo* agent_info, const std::string& scope) {
auto arr = xml_->GetNodes("top." + scope + ".metric");
xml::Xml::node_list_t metrics_list(arr.begin(), arr.end());
uint32_t metrics_number = metrics_list.size();
@@ -380,7 +382,7 @@ class MetricsDict {
}
xml::Xml* xml_;
const Agent::AgentInfo* agent_info_;
const rocprofiler::HSAAgentInfo* agent_info_;
std::string agent_name_;
cache_t cache_;
+2 -1
Просмотреть файл
@@ -1,6 +1,7 @@
#include "df_counters_mi200.h"
#include "df_perfmon_registers_mi200.h"
#include "mmio.h"
#include "src/core/hsa/hsa_support.h"
namespace rocprofiler {
@@ -15,7 +16,7 @@ namespace rocprofiler {
/* get ficaa value for accessing CakeDlwmActiveTransferCount */
#define AMDGPU_PMU_SET_FICAA(o) ((o << 16) | 0x1AF5)
DFPerfMonMI200::DFPerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
DFPerfMonMI200::DFPerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) {
mmio_ = dynamic_cast<mmio::DFPerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::DF_PERFMON, info));
}
+1 -1
Просмотреть файл
@@ -14,7 +14,7 @@ namespace rocprofiler {
class DFPerfMonMI200 : public PerfMon {
public:
DFPerfMonMI200(const Agent::AgentInfo& info);
DFPerfMonMI200(const HSAAgentInfo& info);
~DFPerfMonMI200();
void Start() override;
void Stop(){};
+5 -5
Просмотреть файл
@@ -51,10 +51,10 @@ void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char*
#endif
}
MMIO::MMIO(const Agent::AgentInfo& info)
MMIO::MMIO(const HSAAgentInfo& info)
: agent_info_(&info), pci_memory_(nullptr), type_(DEFAULT_MMAP) {
const auto pci_domain = agent_info_->getPCIDomain();
const auto pci_location_id = agent_info_->getPCILocationID();
const auto pci_domain = agent_info_->GetDeviceInfo().getPCIDomain();
const auto pci_location_id = agent_info_->GetDeviceInfo().getPCILocationID();
pci_device_ =
pci_device_find_by_slot(pci_domain, pci_location_id >> 8, pci_location_id & 0xFF, 0);
@@ -123,7 +123,7 @@ bool MMIO::RegisterReadAPI(uint32_t reg_offset, uint32_t& value) {
}
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const HSAAgentInfo& info) {
MMIO* mmio = nullptr;
switch (type) {
case PCIE_PERFMON: {
@@ -152,7 +152,7 @@ MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
return mmio;
}
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info) {
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info) {
MMIO* mmio = nullptr;
auto it = mmio_instances_.find(info.getHandle());
if (it != mmio_instances_.end()) {
+8 -7
Просмотреть файл
@@ -23,6 +23,7 @@
#include <hsa/hsa.h>
#include "src/core/hardware/hsa_info.h"
#include "src/core/hsa/hsa_support.h"
#include <pciaccess.h>
#include <mutex>
@@ -67,17 +68,17 @@ class MMIO {
virtual ~MMIO();
friend class MMIOManager;
const Agent::AgentInfo& GetAgentInfo() { return *agent_info_; }
const HSAAgentInfo& GetAgentInfo() { return *agent_info_; }
mmap_type_t Type() { return type_; }
protected:
MMIO(const Agent::AgentInfo& info);
MMIO(const HSAAgentInfo& info);
// default constructor; helpful for derived classes
// which want to setup mmio construction differently
MMIO() { type_ = DEFAULT_MMAP; };
const Agent::AgentInfo* agent_info_;
const HSAAgentInfo* agent_info_;
struct pci_device* pci_device_;
size_t pci_memory_size_;
uint32_t* pci_memory_;
@@ -94,7 +95,7 @@ class PciePerfmonMMIO : public MMIO {
friend class MMIOManager;
protected:
PciePerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; };
PciePerfmonMMIO(const HSAAgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; };
};
// DFPerfmonMMIO has same mmio setup approach as
@@ -104,7 +105,7 @@ class DFPerfmonMMIO : public MMIO {
friend class MMIOManager;
protected:
DFPerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; };
DFPerfmonMMIO( const HSAAgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; };
};
/*
Class to manage mmio for UMC/DF/PCIe etc.
@@ -114,8 +115,8 @@ class DFPerfmonMMIO : public MMIO {
*/
class MMIOManager {
public:
static MMIO* CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info);
static MMIO* GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info);
static MMIO* CreateMMIO(mmap_type_t type, const HSAAgentInfo& info);
static MMIO* GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info);
static void DestroyMMIOInstance(MMIO* instance);
private:
+1 -1
Просмотреть файл
@@ -4,7 +4,7 @@
namespace rocprofiler {
PciePerfMonMI200::PciePerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
PciePerfMonMI200::PciePerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) {
mmio_ =
dynamic_cast<mmio::PciePerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::PCIE_PERFMON, info));
}
+1 -1
Просмотреть файл
@@ -13,7 +13,7 @@ namespace rocprofiler {
class PciePerfMonMI200 : public PerfMon {
public:
PciePerfMonMI200(const Agent::AgentInfo& info);
PciePerfMonMI200(const HSAAgentInfo& info);
~PciePerfMonMI200();
void SetCounterNames(std::vector<std::string>& counter_names) override;
void Start() override;
+110 -72
Просмотреть файл
@@ -19,9 +19,13 @@
THE SOFTWARE. */
#include "hsa_info.h"
#include <fstream>
#include <experimental/filesystem>
#include "src/utils/helper.h"
namespace fs = std::experimental::filesystem;
#define CHECK_STATUS(msg, status) \
do { \
if ((status) != HSA_STATUS_SUCCESS) { \
@@ -35,86 +39,120 @@
namespace Agent {
// AgentInfo Class
AgentInfo::AgentInfo() {}
AgentInfo::AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table) : handle_(agent.handle) {
if (table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
rocprofiler::fatal("hsa_agent_get_info failed");
char convert(uint32_t version) {
uint32_t diff = version - 10;
if (static_cast<char>('a' + diff) >= 'a' && static_cast<char>('a' + diff) <= 'z')
return static_cast<char>('a' + diff);
rocprofiler::fatal("Incorrect gpu version");
}
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
DeviceInfo::DeviceInfo(uint32_t topology_id, uint32_t gpu_id) {
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes/";
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
if (!fs::exists(sysfs_nodes_path))
rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
// Check the type of the device using gpu id
if (gpu_id == 0) assert("DeviceInfo does not support CPU");
numa_node_ = topology_id;
fs::path node_path = sysfs_nodes_path / std::to_string(topology_id);
if (!fs::exists(node_path)) rocprofiler::fatal("Could not opendir `%s'", node_path.c_str());
xcc_num_ = 1;
gpu_id_ = gpu_id;
fs::path properties_path = node_path / "properties";
std::ifstream props_ifs(properties_path);
uint32_t cu_per_simd_array = 0, array_count = 0;
uint32_t max_waves_per_simd = 0, gfx_target_version = 0;
std::string prop_name, minor_version_str, stepping_str;
uint64_t prop_value;
uint32_t major_version = 0, minor_version = 0, stepping = 0;
std::stringstream hex_minor_version;
max_wave_size_ = 0;
simds_per_cu_ = 0;
shader_arrays_per_se_ = 0;
se_num_ = 0;
waves_per_cu_ = 0;
cu_num_ = 0;
compute_units_per_sh_ = 0;
if (!props_ifs.is_open())
rocprofiler::fatal("Could not open %s/properties", properties_path.c_str());
while (props_ifs >> prop_name >> prop_value) {
if (prop_name == "wave_front_size") {
max_wave_size_ = static_cast<uint32_t>(prop_value);
if (max_wave_size_ <= 0) rocprofiler::fatal("Invalid max_wave_size_ in the topology file");
} else if (prop_name == "cu_per_simd_array") {
cu_per_simd_array = static_cast<uint32_t>(prop_value);
if (cu_per_simd_array <= 0)
rocprofiler::fatal("Invalid cu_per_simd_array in the topology file");
} else if (prop_name == "array_count") {
array_count = static_cast<uint32_t>(prop_value);
if (array_count <= 0) rocprofiler::fatal("Invalid array_count in the topology file");
} else if (prop_name == "simd_per_cu") {
simds_per_cu_ = static_cast<uint32_t>(prop_value);
if (simds_per_cu_ <= 0) rocprofiler::fatal("Invalid simd_per_cu in the topology file");
} else if (prop_name == "location_id")
pci_location_id_ = static_cast<uint32_t>(prop_value);
else if (prop_name == "domain")
pci_domain_ = static_cast<uint32_t>(prop_value);
else if (prop_name == "simd_arrays_per_engine") {
shader_arrays_per_se_ = static_cast<uint32_t>(prop_value);
if (shader_arrays_per_se_ <= 0)
rocprofiler::fatal("Invalid simd_arrays_per_engine in the topology file");
} else if (prop_name == "max_waves_per_simd") {
max_waves_per_simd = static_cast<uint32_t>(prop_value);
if (max_waves_per_simd <= 0)
rocprofiler::fatal("Invalid max_waves_per_simd in the topology file");
} else if (prop_name == "gfx_target_version")
gfx_target_version = static_cast<uint32_t>(prop_value);
else if (prop_name == "unique_id")
unique_gpu_id_ = static_cast<uint64_t>(prop_value);
else if (prop_name == "num_xcc")
xcc_num_ = static_cast<uint32_t>(prop_value);
}
se_num_ = array_count / shader_arrays_per_se_;
waves_per_cu_ = max_waves_per_simd * simds_per_cu_;
cu_num_ = cu_per_simd_array * array_count;
major_version = (gfx_target_version / 100) / 100;
std::string major_version_str = std::to_string(major_version);
minor_version = (gfx_target_version / 100) % 100;
if (minor_version > 9)
minor_version_str = std::string(1, convert(minor_version));
else
minor_version_str = std::to_string(minor_version);
stepping = (gfx_target_version % 100);
if (stepping > 9)
stepping_str = std::string(1, convert(stepping));
else
stepping_str = std::to_string(stepping);
std::string gpu_name = "gfx" + major_version_str + minor_version_str + stepping_str;
strcpy(name_, gpu_name.c_str());
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
memcpy(gfxip_, name_, gfxip_label_len);
gfxip_[gfxip_label_len] = '\0';
if (type_ != HSA_DEVICE_TYPE_GPU) {
return;
}
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
table->hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
table->hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
table->hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
if (table->hsa_agent_get_info_fn(agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&waves_per_cu_) != HSA_STATUS_SUCCESS) {
rocprofiler::fatal("hsa_agent_get_info for gfxip hardware configuration failed");
}
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
if (table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_DOMAIN,
&pci_domain_) != HSA_STATUS_SUCCESS ||
table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID,
&pci_location_id_) != HSA_STATUS_SUCCESS) {
rocprofiler::fatal("hsa_agent_get_info for PCI info failed");
}
// TODO(saurabh, giovanni): Remove this in 5.7
if (table->hsa_agent_get_info_fn(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
&xcc_num_) != HSA_STATUS_SUCCESS) {
xcc_num_ = 1;
}
}
uint64_t AgentInfo::getIndex() const { return index_; }
hsa_device_type_t AgentInfo::getType() const { return type_; }
uint64_t AgentInfo::getHandle() const { return handle_; }
const std::string_view AgentInfo::getName() const { return name_; }
std::string AgentInfo::getGfxip() const { return std::string(gfxip_); }
uint32_t AgentInfo::getMaxWaveSize() const { return max_wave_size_; }
uint32_t AgentInfo::getMaxQueueSize() const { return max_queue_size_; }
uint32_t AgentInfo::getCUCount() const { return cu_num_; }
uint32_t AgentInfo::getSimdCountPerCU() const { return simds_per_cu_; }
uint32_t AgentInfo::getShaderEngineCount() const { return se_num_; }
uint32_t AgentInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; }
uint32_t AgentInfo::getMaxWavesPerCU() const { return waves_per_cu_; }
uint32_t AgentInfo::getCUCountPerSH() const { return compute_units_per_sh_; }
uint32_t AgentInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; }
uint32_t AgentInfo::getPCIDomain() const { return pci_domain_; }
uint32_t AgentInfo::getPCILocationID() const { return pci_location_id_; }
uint32_t AgentInfo::getXccCount() const { return xcc_num_; }
void AgentInfo::setIndex(uint64_t index) { index_ = index; }
void AgentInfo::setType(hsa_device_type_t type) { type_ = type; }
void AgentInfo::setHandle(uint64_t handle) { handle_ = handle; }
void AgentInfo::setName(const std::string& name) { strcpy(name_, name.c_str()); }
void AgentInfo::setNumaNode(uint32_t numa_node) { numa_node_ = numa_node; }
uint32_t AgentInfo::getNumaNode() { return numa_node_; }
void AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent) { near_cpu_agent_ = near_cpu_agent; }
hsa_agent_t AgentInfo::getNearCpuAgent() { return near_cpu_agent_; }
std::string_view DeviceInfo::getName() const { return name_; }
std::string DeviceInfo::getGfxip() const { return std::string(gfxip_); }
uint32_t DeviceInfo::getMaxWaveSize() const { return max_wave_size_; }
uint32_t DeviceInfo::getMaxQueueSize() const { return max_queue_size_; }
uint32_t DeviceInfo::getCUCount() const { return cu_num_; }
uint32_t DeviceInfo::getSimdCountPerCU() const { return simds_per_cu_; }
uint32_t DeviceInfo::getShaderEngineCount() const { return se_num_; }
uint32_t DeviceInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; }
uint32_t DeviceInfo::getMaxWavesPerCU() const { return waves_per_cu_; }
uint32_t DeviceInfo::getCUCountPerSH() const { return compute_units_per_sh_; }
uint32_t DeviceInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; }
uint32_t DeviceInfo::getPCIDomain() const { return pci_domain_; }
uint32_t DeviceInfo::getPCILocationID() const { return pci_location_id_; }
uint32_t DeviceInfo::getXccCount() const { return xcc_num_; }
uint64_t DeviceInfo::getUniqueGPUId() const { return unique_gpu_id_; }
uint32_t DeviceInfo::getNumaNode() const { return numa_node_; }
uint64_t DeviceInfo::getGPUId() const { return gpu_id_; }
// CounterHardwareInfo Class
+12 -31
Просмотреть файл
@@ -36,18 +36,15 @@ namespace Agent {
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
// XXX TODO: This should be merged into rocprofiler::hsa_support::AgentInfo and
// this file should be removed entirely, as it's completely redundant
class AgentInfo {
//DeviceInfo supports only GPU
class DeviceInfo {
public:
AgentInfo();
AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table);
uint64_t getIndex() const;
hsa_device_type_t getType() const;
uint64_t getHandle() const;
const std::string_view getName() const;
DeviceInfo() = default;
DeviceInfo(uint32_t topology_id, uint32_t gpu_id);
uint64_t getGPUId() const;
std::string_view getName() const;
std::string getGfxip() const;
uint32_t getMaxWaveSize() const;
uint32_t getMaxQueueSize() const;
@@ -61,26 +58,11 @@ class AgentInfo {
uint32_t getPCIDomain() const;
uint32_t getPCILocationID() const;
uint32_t getXccCount() const;
void setIndex(uint64_t index);
void setType(hsa_device_type_t type);
void setHandle(uint64_t handle);
void setName(const std::string& name);
void setNumaNode(uint32_t numa_node);
uint32_t getNumaNode();
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
hsa_agent_t getNearCpuAgent();
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t kernarg_pool;
hsa_amd_memory_pool_t gpu_pool;
uint64_t getUniqueGPUId() const;
uint32_t getNumaNode() const;
private:
uint64_t index_;
hsa_device_type_t type_; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint64_t handle_;
char name_[64];
char gfxip_[64];
uint32_t max_wave_size_;
@@ -95,12 +77,11 @@ class AgentInfo {
uint32_t wave_slots_per_simd_;
// Number of XCCs on the GPU
uint32_t xcc_num_;
uint32_t pci_domain_;
uint32_t pci_location_id_;
uint64_t unique_gpu_id_;
uint32_t numa_node_;
hsa_agent_t near_cpu_agent_;
uint32_t gpu_id_;
};
// XXX TODO: This should be moved somewhere else so this file can be deleted
-123
Просмотреть файл
@@ -1,123 +0,0 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "hsa_common.h"
#include "src/utils/exception.h"
namespace rocprofiler {
namespace hsa_support {
std::mutex agents_map_lock;
std::map<decltype(hsa_agent_t::handle), Agent::AgentInfo> agent_info_map;
Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle) {
std::lock_guard<std::mutex> lock(agents_map_lock);
if (agent_info_map.find(handle) != agent_info_map.end()) {
return agent_info_map.at(handle);
} else {
std::cerr << std::string("Error: Can't find Agent with handle(") << std::to_string(handle)
<< ") in this system" << std::endl;
abort();
}
}
std::vector<hsa_agent_t> cpu_agents_list;
void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info) {
std::lock_guard<std::mutex> lock(agents_map_lock);
agent_info_map.emplace(handle, agent_info);
if (agent_info.getType() == HSA_DEVICE_TYPE_GPU) {
cpu_agents_list.emplace_back(hsa_agent_t{handle});
}
}
std::vector<hsa_agent_t>& GetCPUAgentList() { return cpu_agents_list; }
hsa_agent_t GetAgentByIndex(uint64_t agent_index) {
std::lock_guard<std::mutex> lock(agents_map_lock);
for (auto& agent_info : agent_info_map) {
if (agent_info.second.getIndex() == agent_index) {
return hsa_agent_t{agent_info.second.getHandle()};
}
}
std::cerr << std::string("Error: Can't find Agent with Index(") << std::to_string(agent_index)
<< ") in this system" << std::endl;
abort();
}
CoreApiTable saved_core_api{};
CoreApiTable& GetCoreApiTable() { return saved_core_api; }
void SetCoreApiTable(const CoreApiTable& table) { saved_core_api = table; }
AmdExtTable saved_amd_ext_api{};
AmdExtTable GetAmdExtTable() { return saved_amd_ext_api; }
void SetAmdExtTable(AmdExtTable* table) { saved_amd_ext_api = *table; }
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi() { return hsa_loader_api; }
void SetHSALoaderApi() {
hsa_status_t status = saved_core_api.hsa_system_get_major_extension_table_fn(
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed");
}
void ResetMaps() {
if (hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
status != HSA_STATUS_SUCCESS)
assert(!"hsa_amd_profiling_async_copy_enable failed");
memset(&saved_core_api, '\0', sizeof(saved_core_api));
memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api));
memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api));
}
rocprofiler_timestamp_t GetCurrentTimestampNS() {
// If the HSA intercept is installed, then use the "original"
// 'hsa_system_get_info' function to avoid reporting calls for internal use
// of the HSA API by the tracer.
auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn;
// If the HSA intercept is not installed, use the default
// 'hsa_system_get_info'.
if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info;
uint64_t sysclock;
if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
return rocprofiler_timestamp_t{0};
else if (status != HSA_STATUS_SUCCESS)
assert(!"hsa_system_get_info failed");
static uint64_t sysclock_period = [&]() {
uint64_t sysclock_hz = 0;
if (hsa_status_t status =
hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
status != HSA_STATUS_SUCCESS)
assert(!"hsa_system_get_info failed");
return (uint64_t)1000000000 / sysclock_hz;
}();
return rocprofiler_timestamp_t{sysclock * sysclock_period};
}
} // namespace hsa_support
} // namespace rocprofiler
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+72 -12
Просмотреть файл
@@ -29,8 +29,10 @@
#include <atomic>
#include <string>
#include <unordered_map>
#include <memory>
#include "hsa_common.h"
#include "rocprofiler.h"
#include "src/core/hardware/hsa_info.h"
// HSA EVT data type
@@ -92,15 +94,78 @@ typedef struct {
namespace rocprofiler {
namespace hsa_support {
void Initialize(HsaApiTable* Table);
hsa_status_t hsa_iterate_agents_cb(hsa_agent_t agent, void* data);
void Finalize();
class HSAAgentInfo {
private:
hsa_agent_t agent_;
Agent::DeviceInfo device_info_;
hsa_agent_t near_cpu_agent_;
hsa_device_type_t type_;
bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
} // namespace hsa_support
public:
HSAAgentInfo(hsa_agent_t agent, hsa_device_type_t type) : agent_(agent), type_(type){};
uint64_t getHandle() const;
const Agent::DeviceInfo& GetDeviceInfo() const;
void SetNearCpuAgent(hsa_agent_t near_cpu_agent);
void SetDeviceInfo(Agent::DeviceInfo device_info);
hsa_agent_t GetNearCpuAgent() const;
hsa_device_type_t GetType() const;
hsa_amd_memory_pool_t cpu_pool_;
hsa_amd_memory_pool_t kernarg_pool_;
hsa_amd_memory_pool_t gpu_pool_;
};
struct queues_deleter {
queues_deleter() {};
queues_deleter(queues_deleter&) { };
void operator() (void * queue) const;
};
class HSASupport_Singleton {
private:
HSASupport_Singleton() {};
~HSASupport_Singleton() = delete;
CoreApiTable saved_core_api;
AmdExtTable saved_amd_ext_api;
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api;
std::mutex info_map_mutex_;
std::unordered_map<uint64_t, HSAAgentInfo> HSAagent_info_map_;
std::atomic<bool> ksymbols_flag{true};
std::atomic<bool> kernel_names_flag{true};
std::mutex queues_mutex_;
std::unordered_map<hsa_queue_t*, std::unique_ptr<void, queues_deleter&>> queues;
void SetCoreApiTable(CoreApiTable& table);
void SetAmdExtTable(AmdExtTable& table);
void SetHSALoaderApi();
public:
std::vector<hsa_agent_t> gpu_agents;
HSAAgentInfo& GetHSAAgentInfo(uint64_t agent_handle);
HSAAgentInfo& GetHSAAgentInfo(Agent::DeviceInfo device_info);
Agent::DeviceInfo& GetDeviceInfo(HSAAgentInfo* agent_info);
std::mutex kernel_names_map_lock;
std::map<std::string, std::vector<uint64_t>>* kernel_names;
std::mutex ksymbol_map_lock;
std::map<uint64_t, std::string>* ksymbols;
void SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info);
static HSASupport_Singleton& GetInstance();
CoreApiTable& GetCoreApiTable();
AmdExtTable& GetAmdExtTable();
hsa_ven_amd_loader_1_01_pfn_t& GetHSALoaderApi();
void AddQueue(hsa_queue_t* queue, std::unique_ptr<void, queues_deleter&>);
void RemoveQueue(hsa_queue_t* queue);
void HSAInitialize(HsaApiTable* Table);
void HSAFinalize();
void InitKsymbols();
void FinitKsymbols();
HSASupport_Singleton(const HSASupport_Singleton&) = delete;
HSASupport_Singleton& operator=(const HSASupport_Singleton&) = delete;
};
bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
} // namespace rocprofiler
#include "src/core/session/tracer/src/roctracer.h"
@@ -126,11 +191,6 @@ uint32_t GetApiCode(const char* str);
void RegisterTracerCallback(int (*function)(rocprofiler_tracer_activity_domain_t domain,
uint32_t operation_id, void* data));
rocprofiler_timestamp_t timestamp_ns();
void Initialize_roctracer(HsaApiTable* table);
} // namespace roctracer::hsa_support
#endif // SRC_CORE_HSA_HSA_SUPPORT_H_
+60 -47
Просмотреть файл
@@ -40,7 +40,6 @@
#include "src/core/counters/basic/basic_counter.h"
#include "src/utils/exception.h"
#include "src/utils/logger.h"
#include "src/core/hsa/hsa_common.h"
#include "src/core/counters/metrics/metrics.h"
#include "src/core/hardware/hsa_info.h"
@@ -83,13 +82,14 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info");
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info");
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
@@ -114,20 +114,22 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, true);
}
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) {
void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
hsa_status_t status =
rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
cpu_agent, FindStandardPool, &(agent_info->cpu_pool));
hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
cpu_agent, FindStandardPool, &(agent_info->cpu_pool_));
CHECK_HSA_STATUS("Error: Command Buffer Pool is not initialized", status);
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool));
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool_));
CHECK_HSA_STATUS("Error: Output Buffer Pool is not initialized", status);
}
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) {
void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
hsa_status_t status =
hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, &(agent_info->gpu_pool));
hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, &(agent_info->gpu_pool_));
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status);
}
@@ -136,13 +138,18 @@ struct block_des_t {
uint32_t index;
};
std::map<uint32_t, rocprofiler::MetricsDict*> metricsDict;
static std::atomic<bool> counters_added{false};
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents) {
for (auto& gpu_agent : gpu_agents) {
std::map<uint32_t, rocprofiler::MetricsDict*> metricsDict;
void CheckPacketReqiurements() {
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
for (auto& gpu_agent : hsasupport_singleton.gpu_agents) {
// get the instance of MetricsDict
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
metricsDict[gpu_agent.handle] = rocprofiler::MetricsDict::Create(&agentInfo);
}
}
@@ -155,18 +162,18 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
std::vector<std::string>& counter_names, rocprofiler_session_id_t session_id,
bool is_spm) {
hsa_status_t status = HSA_STATUS_SUCCESS;
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
if (!counters_added.load(std::memory_order_acquire)) {
for (auto& name : counter_names) {
rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id)
->GetProfiler()
->AddCounterName(name);
if (rocprofiler_singleton.HasActiveSession()) {
rocprofiler_singleton.GetSession(session_id)->GetProfiler()->AddCounterName(name);
}
}
counters_added.exchange(true, std::memory_order_release);
}
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
std::map<std::string, rocprofiler::results_t*> results_map;
std::vector<rocprofiler::event_t> events_list;
std::vector<rocprofiler::results_t*> results_list;
@@ -330,8 +337,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
<< "Error: Command buffer given size is " << size << std::endl;
abort();
}
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.cpu_pool, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.cpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
if (status != HSA_STATUS_SUCCESS) {
profile->command_buffer.ptr = malloc(size);
/*numa_alloc_onnode(
@@ -344,11 +351,10 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
}
} else {
// Both the CPU and GPU can access the memory
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
ag_list_count, ag_list, NULL, profile->command_buffer.ptr);
CHECK_HSA_STATUS("Error: Allowing access to Command Buffer", status);
}
if (!is_spm) {
status = HSA_STATUS_ERROR;
size_t size = profile->output_buffer.size;
@@ -358,8 +364,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
<< "Error: Output buffer given size is " << size << std::endl;
abort();
}
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.kernarg_pool, size, 0, reinterpret_cast<void**>(&profile->output_buffer.ptr));
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.kernarg_pool_, size, 0, reinterpret_cast<void**>(&profile->output_buffer.ptr));
if (status != HSA_STATUS_SUCCESS) {
profile->output_buffer.ptr = malloc(size);
/*numa_alloc_onnode(
@@ -372,7 +378,7 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
abort();
}
} else {
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
ag_list_count, ag_list, NULL, profile->output_buffer.ptr);
CHECK_HSA_STATUS("Error: GPU Agent can't have output buffer access", status);
memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size);
@@ -420,29 +426,34 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
// Preparing an Getting the size of the command and output buffers
status = hsa_ven_amd_aqlprofile_start(profile, NULL);
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
size_t ag_list_count = 1;
hsa_agent_t ag_list[ag_list_count];
ag_list[0] = gpu_agent;
// Allocating Command Buffer
//FixMe: Command buffer and output buffers are allocated repetatively.
status = HSA_STATUS_ERROR;
size_t size = profile->command_buffer.size;
profile->command_buffer.ptr = nullptr;
if (size <= 0) return nullptr;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.cpu_pool, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.cpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
ag_list_count, ag_list, NULL, profile->command_buffer.ptr);
CHECK_HSA_STATUS("Error: GPU Agent can't have command buffer access", status);
} else {
hsa_agent_t near_cpu_node = agentInfo.GetNearCpuAgent();
uint32_t near_cpu_node_id = 0;
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(near_cpu_node,
HSA_AGENT_INFO_NODE, &near_cpu_node_id);
profile->command_buffer.ptr = numa_alloc_onnode(
profile->command_buffer.size,
rocprofiler::hsa_support::GetAgentInfo(agentInfo.getNearCpuAgent().handle).getNumaNode());
near_cpu_node_id);
if (profile->command_buffer.ptr != nullptr) {
status = HSA_STATUS_SUCCESS;
} else {
@@ -455,12 +466,12 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
size = profile->output_buffer.size;
profile->output_buffer.ptr = nullptr;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.gpu_pool, size, 0, reinterpret_cast<void**>(&(profile->output_buffer.ptr)));
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
agentInfo.gpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->output_buffer.ptr)));
CHECK_HSA_STATUS("Error: Can't Allocate Output Buffer", status);
// Both the CPU and GPU can access the kernel arguments
if (status == HSA_STATUS_SUCCESS) {
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
ag_list_count, ag_list, NULL, profile->output_buffer.ptr);
CHECK_HSA_STATUS("Error: Can't allow access on the Output Buffer for the GPU", status);
memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size);
@@ -490,11 +501,12 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
*cpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
ag_list_count, ag_list, NULL, buffer);
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
@@ -504,32 +516,33 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
// Allocate memory for use by a kernel of specified size
uint8_t* AllocateLocalMemory(size_t size, hsa_amd_memory_pool_t* gpu_pool) {
hsa_status_t status = HSA_STATUS_ERROR;
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_amd_memory_pool_allocate(*gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(*gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile,
size_t att_buffer_size) {
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile, size_t att_buffer_size) {
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle);
profile->command_buffer.ptr =
AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool);
AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool_);
profile->output_buffer.size = att_buffer_size;
profile->output_buffer.ptr = (g_output_buffer_local)
? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool)
: AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool);
? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool_)
: AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool_);
return (profile->command_buffer.ptr && profile->output_buffer.ptr) ? HSA_STATUS_SUCCESS
: HSA_STATUS_ERROR;
}
bool AllocateMemoryPools(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
hsa_amd_memory_pool_t* cpu_pool, hsa_amd_memory_pool_t* gpu_pool) {
hsa_status_t status = hsa_amd_agent_iterate_memory_pools(cpu_agent, FindStandardPool, cpu_pool);
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(cpu_agent, FindStandardPool, cpu_pool);
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(cpu_pool)", status);
status = hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, gpu_pool);
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, gpu_pool);
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status);
return true;
+3 -4
Просмотреть файл
@@ -46,8 +46,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool);
void GetCommandBufferMap(std::map<size_t, uint8_t*>);
void GetOutputBufferMap(std::map<size_t, uint8_t*>);
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info);
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info);
void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info);
void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info);
hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_event_t* events,
uint32_t event_count, packet_t* start_packet, packet_t* stop_packet, packet_t* read_packet);
@@ -65,8 +65,7 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
void get_command_buffer_map(std::map<size_t, uint8_t*>);
void get_outbuffer_map(std::map<size_t, uint8_t*>);
void initialize_pools(hsa_agent_t cpu_agent);
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents);
void CheckPacketReqiurements();
typedef struct {
hsa_amd_memory_pool_t cpu_mem_pool;
+96 -151
Просмотреть файл
@@ -35,6 +35,7 @@
#include "src/utils/helper.h"
#include "src/core/isa_capture/code_object_track.hpp"
#define CHECK_HSA_STATUS(msg, status) \
do { \
if ((status) != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { \
@@ -74,37 +75,37 @@ static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1,
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> att_trace_callback_data_t;
static std::mutex ksymbol_map_lock;
static std::map<uint64_t, std::string>* ksymbols;
static std::atomic<bool> ksymbols_flag{true};
void AddKernelName(uint64_t handle, std::string name) {
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
ksymbols->emplace(handle, name);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
hsasupport_singleton.ksymbols->emplace(handle, name);
}
void RemoveKernelName(uint64_t handle) {
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
ksymbols->erase(handle);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
hsasupport_singleton.ksymbols->erase(handle);
}
std::string GetKernelNameFromKsymbols(uint64_t handle) {
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
if (ksymbols->find(handle) != ksymbols->end())
return ksymbols->at(handle);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
if (hsasupport_singleton.ksymbols->find(handle) != hsasupport_singleton.ksymbols->end())
return hsasupport_singleton.ksymbols->at(handle);
else
return "Unknown Kernel!";
}
static std::mutex kernel_names_map_lock;
static std::map<std::string, std::vector<uint64_t>>* kernel_names;
static std::atomic<bool> kernel_names_flag{true};
void AddKernelNameWithDispatchID(std::string name, uint64_t id) {
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
if (kernel_names->find(name) == kernel_names->end())
kernel_names->emplace(name, std::vector<uint64_t>());
kernel_names->at(name).push_back(id);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.kernel_names_map_lock);
if (hsasupport_singleton.kernel_names->find(name) == hsasupport_singleton.kernel_names->end())
hsasupport_singleton.kernel_names->emplace(name, std::vector<uint64_t>());
hsasupport_singleton.kernel_names->at(name).push_back(id);
}
std::string GetKernelNameUsingDispatchID(uint64_t given_id) {
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
for (auto kernel_name : (*kernel_names)) {
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.kernel_names_map_lock);
for (auto kernel_name : (*hsasupport_singleton.kernel_names)) {
for (auto dispatch_id : kernel_name.second) {
if (dispatch_id == given_id) return kernel_name.first;
}
@@ -112,34 +113,6 @@ std::string GetKernelNameUsingDispatchID(uint64_t given_id) {
return "Unknown Kernel!";
}
void InitKsymbols() {
if (ksymbols_flag.load(std::memory_order_relaxed)) {
{
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
ksymbols = new std::map<uint64_t, std::string>();
ksymbols_flag.exchange(false, std::memory_order_release);
}
{
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
kernel_names = new std::map<std::string, std::vector<uint64_t>>();
kernel_names_flag.exchange(false, std::memory_order_release);
}
}
}
void FinitKsymbols() {
if (!ksymbols_flag.load(std::memory_order_relaxed)) {
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
ksymbols->clear();
delete ksymbols;
ksymbols_flag.exchange(true, std::memory_order_release);
}
if (!kernel_names_flag.load(std::memory_order_relaxed)) {
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
kernel_names->clear();
delete kernel_names;
kernel_names_flag.exchange(true, std::memory_order_release);
}
}
struct kernel_descriptor_t {
@@ -185,7 +158,8 @@ enum amd_kernel_code_property_t {
static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) {
const kernel_descriptor_t* kernel_code = NULL;
hsa_status_t status = hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_query_host_address(
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
hsa_status_t status = hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_query_host_address(
reinterpret_cast<const void*>(kernel_object), reinterpret_cast<const void**>(&kernel_code));
if (HSA_STATUS_SUCCESS != status) {
kernel_code = reinterpret_cast<kernel_descriptor_t*>(kernel_object);
@@ -193,8 +167,8 @@ static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) {
return kernel_code;
}
static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
const std::string_view& name = info.getName();
static uint32_t arch_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
std::string info_name(name.data(), name.size());
if (strcmp(name.data(), "gfx90a") == 0 || strncmp(name.data(), "gfx94", 5) == 0)
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc3,
@@ -210,23 +184,23 @@ static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_
? 8
: 4);
}
static uint32_t accum_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
const std::string_view& name = info.getName();
static uint32_t accum_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
std::string info_name(name.data(), name.size());
if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(info, kernel_code);
if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(name, kernel_code);
if (strcmp(info_name.c_str(), "gfx90a") == 0 || strncmp(info_name.c_str(), "gfx94", 5) == 0)
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
1) *
8 -
arch_vgpr_count(info, kernel_code);
arch_vgpr_count(name, kernel_code);
return 0;
}
static uint32_t sgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
static uint32_t sgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
// GFX10 and later always allocate 128 sgprs.
const std::string_view name = info.getName();
// TODO(srnagara): Recheck the extraction of gfxip from gpu name
const char* name_data = name.data();
const size_t gfxip_label_len = std::min(name.size() - 2, size_t{63});
@@ -259,10 +233,10 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet
kernel_properties_ptr.workgroup_size = (uint32_t)workgroup_size;
kernel_properties_ptr.lds_size = packet.group_segment_size;
kernel_properties_ptr.scratch_size = packet.private_segment_size;
Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(agent.handle);
kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info, *kernel_code);
kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info, *kernel_code);
kernel_properties_ptr.sgpr_count = sgpr_count(agent_info, *kernel_code);
HSAAgentInfo agent_info = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle);
kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
kernel_properties_ptr.sgpr_count = sgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
kernel_properties_ptr.wave_size =
AMD_HSA_BITS_GET(kernel_code->kernel_code_properties,
AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)
@@ -275,9 +249,7 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet
namespace queue {
using rocprofiler::GetROCProfilerSingleton;
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
pmc_callback_data_t* passed_data = reinterpret_cast<pmc_callback_data_t*>(data);
@@ -330,7 +302,7 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign
rocprofiler_record_counter_value_t{value}});
}
record->counters = counters;
rocprofiler::Session* session = GetROCProfilerSingleton()->GetSession(pending->session_id);
rocprofiler::Session* session = rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(pending->session_id);
void* initial_handle = const_cast<rocprofiler_record_counter_instance_t*>(record->counters);
if (session->FindBuffer(pending->buffer_id)) {
Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id);
@@ -347,7 +319,8 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign
void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent,
att_pending_signal_t& pending) {
Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(gpu_agent.handle);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
HSAAgentInfo agent_info = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
att_trace_callback_data_t data;
hsa_status_t status =
hsa_ven_amd_aqlprofile_iterate_data(pending.profile, attTraceDataCallback, &data);
@@ -373,11 +346,11 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent
void* buffer = NULL;
if (data_size != 0) {
// Allocate buffer on CPU to copy out trace data
buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool);
buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool_);
if (buffer == NULL) fatal("Trace data buffer allocation failed");
auto status = rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr,
data_size);
auto status =
hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size);
if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed");
record->shader_engine_data[se_index].buffer_ptr = buffer;
@@ -392,12 +365,13 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent
bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
auto queue_info_session = static_cast<queue_info_session_t*>(data);
if (!queue_info_session || !GetROCProfilerSingleton() ||
!GetROCProfilerSingleton()->GetSession(queue_info_session->session_id) ||
!GetROCProfilerSingleton()->GetSession(queue_info_session->session_id)->GetProfiler())
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
if (!queue_info_session ||
!rocprofiler_singleton.GetSession(queue_info_session->session_id) ||
!rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetProfiler())
return true;
rocprofiler::Session* session =
GetROCProfilerSingleton()->GetSession(queue_info_session->session_id);
rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id);
std::lock_guard<std::mutex> lock(session->GetSessionLock());
rocprofiler::profiler::Profiler* profiler = session->GetProfiler();
std::vector<pending_signal_t*> pending_signals = const_cast<std::vector<pending_signal_t*>&>(
@@ -407,10 +381,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
for (auto it = pending_signals.begin(); it != pending_signals.end();
it = pending_signals.erase(it)) {
auto& pending = *it;
if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal))
return true;
if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) return true;
hsa_amd_profiling_dispatch_time_t time;
hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
queue_info_session->agent, pending->original_signal, &time);
uint32_t record_count = 1;
bool is_individual_xcc_mode = false;
@@ -440,7 +413,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
record.correlation_id = rocprofiler_correlation_id_t{pending->correlation_id};
if (pending->session_id.handle == 0) {
pending->session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
pending->session_id = rocprofiler_singleton.GetCurrentSessionId();
}
if (pending->counters_count > 0) {
if (xcc_id == 0 && pending->context && pending->context->metrics_list.size() > 0 &&
@@ -456,7 +429,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
pending->context->metrics_list,
time.end - time.start);
AddRecordCounters(&record, pending);
} else {
}else {
if (session->FindBuffer(pending->buffer_id)) {
Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id);
buffer->AddRecord(record);
@@ -467,13 +440,12 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
// TODO(aelwazir): we need a better way of distributing events and free them
// if (pending->profile->output_buffer.ptr)
// numa_free(pending->profile->output_buffer.ptr, pending->profile->output_buffer.size);
hsa_status_t status =
rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
(pending->profile->output_buffer.ptr));
hsa_status_t status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
(pending->profile->output_buffer.ptr));
CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status);
// if (pending->profile->command_buffer.ptr)
// numa_free(pending->profile->command_buffer.ptr, pending->profile->command_buffer.size);
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
(pending->profile->command_buffer.ptr));
CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status);
delete pending->profile;
@@ -483,9 +455,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
delete pending->context;
}
if (pending->new_signal.handle)
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
if (queue_info_session->interrupt_signal.handle)
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal);
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal);
}
}
delete queue_info_session;
@@ -496,15 +468,13 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
auto queue_info_session = static_cast<queue_info_session_t*>(data);
if (!queue_info_session || !GetROCProfilerSingleton())
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
if (!queue_info_session ||
!rocprofiler_singleton.GetSession(queue_info_session->session_id) ||
!rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetAttTracer())
return true;
rocprofiler::Session* session =
GetROCProfilerSingleton()->GetSession(queue_info_session->session_id);
if (!session) return true;
std::lock_guard<std::mutex> lock(session->GetSessionLock());
rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id);
rocprofiler::att::AttTracer* att_tracer = session->GetAttTracer();
if (!session->GetAttTracer()) return true;
@@ -516,9 +486,8 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
for (auto it = pending_signals.begin(); it != pending_signals.end();
it = pending_signals.erase(it)) {
auto& pending = *it;
if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal))
return true;
std::lock_guard<std::mutex> lock(session->GetSessionLock());
if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) return true;
rocprofiler_record_att_tracer_t record{};
record.kernel_id = rocprofiler_kernel_id_t{pending.kernel_descriptor};
record.gpu_id = rocprofiler_agent_id_t{(uint64_t)queue_info_session->gpu_index};
@@ -540,7 +509,7 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
std::atomic_thread_fence(std::memory_order_release);
if (pending.session_id.handle == 0) {
pending.session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
pending.session_id = rocprofiler_singleton.GetCurrentSessionId();
}
if (session->FindBuffer(pending.buffer_id)) {
Memory::GenericBuffer* buffer = session->GetBuffer(pending.buffer_id);
@@ -549,10 +518,10 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
}
codeobj_record::free_capture(record.header.id);
hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
(pending.profile->output_buffer.ptr));
CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status);
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
(pending.profile->command_buffer.ptr));
CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status);
delete pending.profile;
@@ -580,20 +549,20 @@ void AddVendorSpecificPacket(const Packet::packet_t* packet,
}
void SignalAsyncHandler(const hsa_signal_t& signal, void* data) {
hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn(
hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn(
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, data);
CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler failed", status);
}
void signalAsyncHandlerATT(const hsa_signal_t& signal, void* data) {
hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn(
hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn(
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandlerATT, data);
CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler for ATT failed", status);
}
void CreateSignal(uint32_t attribute, hsa_signal_t* signal) {
hsa_status_t status =
hsa_support::GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
CHECK_HSA_STATUS("Error: hsa_amd_signal_create failed", status);
}
@@ -635,17 +604,16 @@ void ResetSessionID(rocprofiler_session_id_t id) { session_id = id; }
void CheckNeededProfileConfigs() {
rocprofiler_session_id_t internal_session_id;
if (GetROCProfilerSingleton())
// Getting Session ID
internal_session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
else
internal_session_id = {0};
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
internal_session_id = rocprofiler_singleton.GetCurrentSessionId();
if (session_id.handle == 0 || internal_session_id.handle != session_id.handle) {
session_id = internal_session_id;
// Getting Counters count from the Session
if (session_id.handle > 0 && GetROCProfilerSingleton()) {
session = GetROCProfilerSingleton()->GetSession(session_id);
if (session_id.handle > 0 ) {
session = rocprofiler_singleton.GetSession(session_id);
if (session && session->FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) {
rocprofiler_filter_id_t filter_id =
session->GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION);
@@ -690,9 +658,9 @@ std::pair<std::vector<bool>, bool> GetAllowedProfilesList(const void* packets, i
std::vector<bool> can_profile_packet;
bool b_can_profile_anypacket = false;
can_profile_packet.reserve(pkt_count);
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
assert(ksymbols);
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
assert(hsasupport_singleton.ksymbols);
uint32_t current_writer_id = WRITER_ID.load(std::memory_order_relaxed);
@@ -710,7 +678,7 @@ std::pair<std::vector<bool>, bool> GetAllowedProfilesList(const void* packets, i
for (auto id : kernel_profile_dispatch_ids) b_profile_this_object |= id == current_writer_id;
try {
// Can throw
const std::string& kernel_name = ksymbols->at(kdispatch.kernel_object);
const std::string& kernel_name = hsasupport_singleton.ksymbols->at(kdispatch.kernel_object);
// If no filters specified, auto profile this kernel
if (kernel_profile_names.size() == 0 && kernel_profile_dispatch_ids.size() == 0 &&
@@ -739,7 +707,7 @@ ProcessATTParams(
Packet::packet_t& start_packet,
Packet::packet_t& stop_packet,
Queue& queue_info,
Agent::AgentInfo& agentInfo
rocprofiler::HSAAgentInfo& agentInfo
) {
std::vector<hsa_ven_amd_aqlprofile_parameter_t> att_params;
int num_att_counters = 0;
@@ -805,7 +773,7 @@ ProcessATTParams(
* pointer to the packet. This packet is written into the queue by this
* interceptor by invoking the writer function.
*/
void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data,
void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data,
hsa_amd_queue_intercept_packet_writer writer) {
static const char* env_MAX_ATT_PROFILES = getenv("ROCPROFILER_MAX_ATT_PROFILES");
@@ -882,7 +850,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
rocprofiler_kernel_properties_t kernel_properties =
set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent());
if (session) {
uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId();
uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object),
record_id);
if (session_data_count > 0 && profile.second) {
@@ -936,19 +904,21 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
(reinterpret_cast<Packet::packet_t*>(&barrier));
transformed_packets.emplace_back(*pkt);
}
Agent::AgentInfo& agentInfo =
rocprofiler::hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle);
rocprofiler::HSAAgentInfo& agentInfo =
rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
// Creating Async Handler to be called every time the interrupt signal is
// marked complete
SignalAsyncHandler(
interrupt_signal,
new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot,
queue_info.GetQueueID(), writer_id, interrupt_signal,
agentInfo.getIndex(), agentInfo.getXccCount()});
new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, queue_info.GetQueueID(),
writer_id, interrupt_signal, agentInfo.GetDeviceInfo().getGPUId(),
agentInfo.GetDeviceInfo().getXccCount()});
ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_add(1, std::memory_order_relaxed);
// profile_id++;
// } while (replay_mode_count > 0 && profile_id < replay_mode_count); // Profiles loop end
}
/* Write the transformed packets to the hardware queue. */
writer(&transformed_packets[0], transformed_packets.size());
} else if (session_id_snapshot.handle > 0 && pkt_count > 0 && is_att_collection_mode && session &&
@@ -957,7 +927,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
// Getting Queue Data and Information
auto& queue_info = *static_cast<Queue*>(data);
std::lock_guard<std::mutex> lk(queue_info.qw_mutex);
Agent::AgentInfo agentInfo = hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle);
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
bool can_profile_anypacket = false;
std::vector<bool> can_profile_packet;
@@ -1021,7 +991,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
// list to be processed by the signal interrupt
rocprofiler_kernel_properties_t kernel_properties =
set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent());
uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId();
uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object),
record_id);
@@ -1080,31 +1050,13 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
/* Write the original packets to the hardware queue if no profiling session
* is active */
writer(packets, pkt_count);
}
}
Queue::Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue)
: cpu_agent_(cpu_agent), gpu_agent_(gpu_agent) {
[[maybe_unused]] hsa_status_t status =
hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_create_fn(
gpu_agent, size, type, callback, data, private_segment_size, group_segment_size,
&intercept_queue_);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_support::GetAmdExtTable().hsa_amd_profiling_set_profiler_enabled_fn(intercept_queue_,
true);
assert(status == HSA_STATUS_SUCCESS);
hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_register_fn(intercept_queue_,
WriteInterceptor, this);
assert(status == HSA_STATUS_SUCCESS);
*queue = intercept_queue_;
}
Queue::Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent, hsa_queue_t* queue)
: cpu_agent_(cpu_agent), gpu_agent_(gpu_agent), intercept_queue_(queue) { }
Queue::~Queue() {
while (ACTIVE_INTERRUPT_SIGNAL_COUNT.load(std::memory_order_acquire) > 0) {
@@ -1119,15 +1071,8 @@ hsa_agent_t Queue::GetCPUAgent() { return cpu_agent_; }
uint64_t Queue::GetQueueID() { return intercept_queue_->id; }
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) {
Packet::InitializePools(cpu_agent, agent_info);
}
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) {
Packet::InitializeGPUPool(gpu_agent, agent_info);
}
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents) {
Packet::CheckPacketReqiurements(gpu_agents);
}
void CheckPacketReqiurements() {
Packet::CheckPacketReqiurements();}
} // namespace queue
} // namespace rocprofiler
+7 -11
Просмотреть файл
@@ -39,8 +39,7 @@
namespace rocprofiler {
void InitKsymbols();
void FinitKsymbols();
void AddKernelName(uint64_t handle, std::string kernel_name);
void RemoveKernelName(uint64_t handle);
void AddKernelNameWithDispatchID(std::string name, uint64_t id);
@@ -52,12 +51,12 @@ namespace queue {
class Queue {
public:
Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent,
hsa_queue_t* queue);
~Queue();
static void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index,
void* data, hsa_amd_queue_intercept_packet_writer writer);
hsa_queue_t* GetCurrentInterceptQueue();
hsa_agent_t GetGPUAgent();
hsa_agent_t GetCPUAgent();
@@ -69,7 +68,6 @@ class Queue {
std::mutex mutex_;
hsa_agent_t cpu_agent_;
hsa_agent_t gpu_agent_;
hsa_queue_t* original_queue_;
hsa_queue_t* intercept_queue_;
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
@@ -88,12 +86,10 @@ struct queue_info_session_t {
void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_signal_t& pending);
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info);
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info);
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents);
void ResetSessionID(rocprofiler_session_id_t id = rocprofiler_session_id_t{0});
void CheckPacketReqiurements();
} // namespace queue
} // namespace rocprofiler
+2 -2
Просмотреть файл
@@ -77,7 +77,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6
uint64_t mem_size) {
std::lock_guard<std::mutex> lock(mutex);
codeobjs[addr] = std::make_shared<codeobj_capture_instance>(
addr, URI, mem_addr, mem_size, rocprofiler::GetCurrentTimestamp().value);
addr, URI, mem_addr, mem_size, rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value);
std::atomic_thread_fence(std::memory_order_release); // Fencing the state of the map
{
std::lock_guard<std::mutex> lock(codeobj_record::mutex);
@@ -87,7 +87,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6
void codeobj_capture_instance::Unload(uint64_t addr) {
std::lock_guard<std::mutex> lock(mutex);
codeobjs.at(addr)->end_time = rocprofiler::GetCurrentTimestamp().value;
codeobjs.at(addr)->end_time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
codeobjs.erase(addr);
}
+3 -2
Просмотреть файл
@@ -69,8 +69,9 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
GenericBuffer::~GenericBuffer() {
if (is_valid_.load(std::memory_order_acquire)) {
std::lock_guard lock(buffer_lock_);
// if (rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_))
// rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)->DisableTools(id_);
//rocprofiler::ROCProfiler_Singleton& instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
//if (instance.GetSession(session_id_))
// instance.GetSession(session_id_)->DisableTools(id_);
Flush();
+12 -12
Просмотреть файл
@@ -35,17 +35,17 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
pci_system_initialized_(pci_system_init() == 0)
{
params_ = rocprofiler::GetROCProfilerSingleton()
->GetSession(session_id_)
params_ = rocprofiler::ROCProfiler_Singleton::GetInstance()
.GetSession(session_id_)
->GetFilter(filter_id_)
->GetCountersSamplerParameterData();
std::vector<hsa_agent_t> agents;
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* arg) {
auto& agents = *reinterpret_cast<std::vector<hsa_agent_t>*>(arg);
const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle);
if (ai.getType() == HSA_DEVICE_TYPE_GPU) {
const auto& ai = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle);
if (ai.GetType() == HSA_DEVICE_TYPE_GPU) {
agents.emplace_back(agent);
}
return HSA_STATUS_SUCCESS;
@@ -62,8 +62,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
}
if (pcie_counter_names.size() > 0) {
auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
if (agentInfo.getName() == "gfx90a") {
auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle);
if (agentInfo.GetDeviceInfo().getName()== "gfx90a") {
PciePerfMonMI200* perfmon = new PciePerfMonMI200(agentInfo);
perfmon->SetCounterNames(pcie_counter_names);
perfmon_instances_.push_back(perfmon);
@@ -77,8 +77,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
}
if (xgmi_counter_names.size() > 0) {
auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
if (agentInfo.getName() == "gfx90a") {
auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle);
if (agentInfo.GetDeviceInfo().getName() == "gfx90a") {
DFPerfMonMI200* perfmon = new DFPerfMonMI200(agentInfo);
perfmon->SetCounterNames(xgmi_counter_names);
perfmon_instances_.push_back(perfmon);
@@ -132,13 +132,13 @@ void CountersSampler::Stop() {
}
void CountersSampler::AddRecord(rocprofiler_record_counters_sampler_t& record) {
const auto tool = rocprofiler::GetROCProfilerSingleton();
const auto session = tool->GetSession(session_id_);
rocprofiler::ROCProfiler_Singleton& tool = rocprofiler::ROCProfiler_Singleton::GetInstance();
const auto session = tool.GetSession(session_id_);
const auto buffer = session->GetBuffer(buffer_id_);
std::lock_guard<std::mutex> lk(session->GetSessionLock());
record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool->GetUniqueRecordId()}};
record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool.GetUniqueRecordId()}};
// Add the record to the buffer(a deep-copy operation) along with
// a lambda function to deep-copy the record.counters member to
+3 -3
Просмотреть файл
@@ -9,7 +9,7 @@
#include "src/utils/exception.h"
#include "src/core/hsa/queues/queue.h"
// #include "src/core/counters/rdc/rdc_metrics.h"
#include "src/core/hsa/hsa_common.h"
#include <exception>
#include <typeinfo>
@@ -183,8 +183,8 @@ DeviceProfileSession::DeviceProfileSession(std::vector<std::string> profiling_da
if (hsa_agent_get_info(gpu_agent_, HSA_AGENT_INFO_NAME, gpu_name) != HSA_STATUS_SUCCESS)
fatal("Agent name query failed");
Agent::AgentInfo* agentInfo = &(hsa_support::GetAgentInfo(gpu_agent_.handle));
metrics_dict_ = MetricsDict::Create(agentInfo);
HSAAgentInfo agentInfo = (HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent_.handle));
metrics_dict_ = MetricsDict::Create(&agentInfo);
for (auto& d : profiling_data_) {
Metric* metric = const_cast<Metric*>(metrics_dict_->Get(d));
+11 -13
Просмотреть файл
@@ -157,7 +157,7 @@ std::mutex processQueueLock;
// std::vector<uint64_t> timestamp_vec;
// // Get Buffer
// rocprofiler::Session* session =
// rocprofiler::GetROCProfilerSingleton()->GetSession(rocprofiler::GetROCProfilerSingleton()->GetCurrentSessionId());
// rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(rocprofiler::rocmtool::GetInstance().GetCurrentSessionId());
// rocprofiler_filter_id_t filter_id = session->GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION);
// rocprofiler::Filter* filter = session->GetFilter(filter_id);
// rocprofiler_buffer_id_t buffer_id = filter->GetBufferId();
@@ -188,8 +188,7 @@ std::mutex processQueueLock;
// }
// se++;
// }
// record.header.id =
// rocprofiler_record_id_t{rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()};
// record.header.id = rocprofiler_record_id_t{rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId()};
// buffer->AddRecord(record);
// nSample++;
// index += 160;
@@ -241,11 +240,10 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) {
// advance command queue
const uint64_t write_idx =
rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue,
1);
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, 1);
while ((write_idx -
rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(
queue)) >= queue->size) {
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(queue)) >=
queue->size) {
sched_yield(); // TODO: remove
}
@@ -263,8 +261,8 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) {
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
// ringdoor bell
rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal,
write_idx);
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal,
write_idx);
return write_idx;
}
@@ -290,7 +288,7 @@ hsa_signal_value_t signalWait(const hsa_signal_t& signal, const hsa_signal_value
// Probably a maximum wait time should be set. We don't want application to hang because of
// unlimited wait.
// TODO2 : try 500000 assuming nanosecond granularity -- must be verified.
ret_value = rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_wait_scacquire_fn(
ret_value = rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_wait_scacquire_fn(
signal, HSA_SIGNAL_CONDITION_LT, signal_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
if (ret_value == exp_value) break;
@@ -321,9 +319,9 @@ spm::SpmCounters::SpmCounters(rocprofiler_buffer_id_t buffer_id, rocprofiler_fil
// create signals
hsa_status_t status =
hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_);
HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_);
if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed");
status = hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_);
status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_);
if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed");
is_started.store(false, std::memory_order_relaxed);
buffer_read_flag.store(false, std::memory_order_relaxed);
@@ -417,7 +415,7 @@ rocprofiler_status_t spm::SpmCounters::stopSpm() {
hsa_signal_store_screlease(stop_signal_, 1);
hsa_status_t status = HSA_STATUS_SUCCESS;
if (queue_ != nullptr) {
status = hsa_support::GetCoreApiTable().hsa_queue_destroy_fn(queue_);
status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_destroy_fn(queue_);
queue_ = nullptr;
}
if (status != HSA_STATUS_SUCCESS) rocprofiler::warning("Queue destroy failed");
+79 -88
Просмотреть файл
@@ -237,34 +237,33 @@ template <activity_domain_t domain> struct ApiTracer {
};
static void Exit(OperationId operation_id, TraceData* trace_data) {
if (rocprofiler::GetROCProfilerSingleton()) {
if (auto pool = activity_table.Get(operation_id)) {
if (rocprofiler::GetROCProfilerSingleton() &&
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton =
rocprofiler::ROCProfiler_Singleton::GetInstance();
if (auto pool = activity_table.Get(operation_id)) {
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)) {
if (rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
if (rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->IsValid()) {
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
std::lock_guard<std::mutex> lock(rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->GetBufferLock());
assert(trace_data != nullptr);
rocprofiler_record_tracer_t record{};
record.header = rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
record.domain = domain;
record.operation_id = rocprofiler_tracer_operation_id_t{operation_id};
record.correlation_id =
rocprofiler_tracer_activity_correlation_id_t{trace_data->api_data.correlation_id};
record.timestamps = rocprofiler_record_header_timestamp_t{
rocprofiler_timestamp_t{trace_data->phase_enter_timestamp},
hsa_support::timestamp_ns()};
rocprofiler_singleton.timestamp_ns()};
record.thread_id = rocprofiler_thread_id_t{GetTid()};
record.phase = ROCPROFILER_PHASE_NONE;
@@ -272,8 +271,7 @@ template <activity_domain_t domain> struct ApiTracer {
rocprofiler_record_tracer_t ext_record{};
ext_record.header = rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
ext_record.operation_id =
rocprofiler_tracer_operation_id_t{ACTIVITY_EXT_OP_EXTERN_ID};
@@ -283,21 +281,20 @@ template <activity_domain_t domain> struct ApiTracer {
ext_record.phase = ROCPROFILER_PHASE_NONE;
// Write the external correlation id record directly followed by the
// activity record.
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(std::array<rocprofiler_record_tracer_t, 2>{ext_record, record});
} else {
// Write record to the buffer.
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(record);
}
}
}
}
}
CorrelationIdPop();
}
@@ -332,7 +329,7 @@ template <activity_domain_t domain> struct ApiTracer {
trace_data->api_data.correlation_id = CorrelationIdPush();
if (activity_enabled) {
trace_data->phase_enter_timestamp = hsa_support::timestamp_ns().value;
trace_data->phase_enter_timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
trace_data->phase_enter = nullptr;
trace_data->phase_exit = Exit;
}
@@ -365,6 +362,7 @@ ActivityRegistrationTable<ACTIVITY_DOMAIN_HSA_OPS, IsStopped> hsa_ops_activity_t
CallbackRegistrationTable<ACTIVITY_DOMAIN_HSA_EVT, IsStopped> hsa_evt_callback_table;
int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) {
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API:
return HSA_ApiTracer::Enter(static_cast<HSA_ApiTracer::OperationId>(operation_id),
@@ -380,50 +378,47 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
// and make the record point to it. Older HIP runtimes do not provide a kernel name,
// so record.kernel_name might be null.
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
if (rocprofiler::GetROCProfilerSingleton() &&
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)) {
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->GetBufferLock());
rocprofiler_record_tracer_t rocprofiler_record{};
rocprofiler_record.header = rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
rocprofiler_record.domain = domain;
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{};
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind};
rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{};
rocprofiler_record.correlation_id =
rocprofiler_tracer_activity_correlation_id_t{record->correlation_id};
rocprofiler_record.timestamps = rocprofiler_record_header_timestamp_t{
rocprofiler_timestamp_t{record->begin_ns}, rocprofiler_timestamp_t{record->end_ns}};
rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id};
rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id};
rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()};
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) {
rocprofiler_record.name = record->kernel_name;
size_t kernel_name_size = (strlen(record->kernel_name) + 1);
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size,
[](auto& rocprofiler_record, const void* data) {
rocprofiler_record.name = static_cast<const char*>(data);
});
} else {
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record);
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
rocprofiler_singleton.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)) {
std::lock_guard<std::mutex> lock(
rocprofiler_singleton.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->GetBufferLock());
rocprofiler_record_tracer_t rocprofiler_record{};
rocprofiler_record.header = rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
rocprofiler_record.domain = domain;
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{};
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind};
rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{};
rocprofiler_record.correlation_id =
rocprofiler_tracer_activity_correlation_id_t{record->correlation_id};
rocprofiler_record.timestamps =
rocprofiler_record_header_timestamp_t{rocprofiler_timestamp_t{record->begin_ns},
rocprofiler_timestamp_t{record->end_ns}};
rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id};
rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id};
rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()};
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) {
rocprofiler_record.name = record->kernel_name;
size_t kernel_name_size = (strlen(record->kernel_name) + 1);
rocprofiler_singleton.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size,
[](auto& rocprofiler_record, const void* data) {
rocprofiler_record.name = static_cast<const char*>(data);
});
} else {
rocprofiler_singleton.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record);
}
}
}
}
return 0;
}
@@ -438,19 +433,18 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
user_callback->second);
return 0;
} else {
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
if (rocprofiler::GetROCProfilerSingleton() &&
rocprofiler::GetROCProfilerSingleton()->GetSession(
if (
rocprofiler_singleton.GetSession(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id) &&
rocprofiler::GetROCProfilerSingleton()
->GetSession(
rocprofiler_singleton.GetSession(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
->GetBuffer(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)) {
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_ROCTX>::ApiData*>(data)) {
std::lock_guard<std::mutex> lock(
rocprofiler::GetROCProfilerSingleton()
->GetSession(
rocprofiler_singleton.
GetSession(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
->GetBuffer(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)
@@ -460,13 +454,13 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{api_data ? api_data->args.id : 0},
ACTIVITY_DOMAIN_ROCTX,
rocprofiler_tracer_operation_id_t{operation_id},
tracer_api_data,
rocprofiler_tracer_activity_correlation_id_t{0},
rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(),
rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(),
rocprofiler_timestamp_t{0}},
0,
0,
@@ -477,8 +471,8 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
if (api_data && api_data->args.message) {
message_size = strlen(api_data->args.message) + 1;
}
rocprofiler::GetROCProfilerSingleton()
->GetSession(
rocprofiler_singleton
.GetSession(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
->GetBuffer(
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)
@@ -496,21 +490,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
case ACTIVITY_DOMAIN_HSA_OPS:
if (auto pool = hsa_ops_activity_table.Get(operation_id)) {
if (auto record = static_cast<activity_record_t*>(data)) {
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
if (rocprofiler::GetROCProfilerSingleton() &&
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
rocprofiler_singleton.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)) {
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
std::lock_guard<std::mutex> lock(rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->GetBufferLock());
rocprofiler_record_tracer_t rocprofiler_record{};
rocprofiler_record.header = rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
rocprofiler_record.domain = domain;
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{0};
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->op};
@@ -525,16 +515,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
if (record->kernel_name != nullptr && record->op == HSA_OP_ID_DISPATCH) {
size_t kernel_name_size = strlen(record->kernel_name) + 1;
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record, record->kernel_name, kernel_name_size,
[](auto& rocprofiler_record, const void* data) {
rocprofiler_record.name = static_cast<const char*>(data);
});
} else {
rocprofiler::GetROCProfilerSingleton()
->GetSession((*pool)->session_id)
rocprofiler_singleton
.GetSession((*pool)->session_id)
->GetBuffer((*pool)->buffer_id)
->AddRecord(rocprofiler_record);
}
+11 -14
Просмотреть файл
@@ -116,9 +116,10 @@ std::mutex& Tracer::GetTracerLock() { return tracer_lock_; }
void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_data, void* args) {
api_callback_data_t* args_data = reinterpret_cast<api_callback_data_t*>(args);
rocprofiler_tracer_api_data_t api_data{};
if (args_data && rocprofiler::GetROCProfilerSingleton() &&
rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id) &&
rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id)->GetTracer()) {
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
if (args_data &&
rocprofiler_singleton.GetSession(args_data->session_id) &&
rocprofiler_singleton.GetSession(args_data->session_id)->GetTracer()) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX: {
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
@@ -127,12 +128,12 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
rocprofiler_record_tracer_t{
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{data ? data->args.id : 0}, ACTIVITY_DOMAIN_ROCTX,
rocprofiler_tracer_operation_id_t{cid}, api_data,
rocprofiler_tracer_activity_correlation_id_t{0},
rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(),
rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(),
rocprofiler_timestamp_t{0}},
0, 0, GetTid(), ROCPROFILER_PHASE_ENTER},
args_data->session_id);
@@ -147,8 +148,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
rocprofiler_record_tracer_t{
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API,
rocprofiler_tracer_operation_id_t{cid}, api_data,
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
@@ -161,8 +161,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
rocprofiler_record_tracer_t{
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API,
rocprofiler_tracer_operation_id_t{cid}, api_data,
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
@@ -182,8 +181,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
rocprofiler_record_tracer_t{
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API,
rocprofiler_tracer_operation_id_t{cid}, api_data,
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
@@ -196,8 +194,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
rocprofiler_record_tracer_t{
rocprofiler_record_header_t{
ROCPROFILER_TRACER_RECORD,
rocprofiler_record_id_t{
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API,
rocprofiler_tracer_operation_id_t{cid}, api_data,
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
+14 -14
Просмотреть файл
@@ -30,7 +30,6 @@
#include "src/api/rocprofiler_singleton.h"
#include "src/pcsampler/session/pc_sampler.h"
#include "src/pcsampler/gfxip/gfxip.h"
#include "src/core/hsa/hsa_common.h"
#include "src/core/hsa/hsa_support.h"
namespace rocprofiler::pc_sampler {
@@ -59,17 +58,18 @@ void PCSampler::Start() {
using agents_t = std::vector<hsa_agent_t>;
agents_t agents;
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* arg) {
auto& agents = *reinterpret_cast<agents_t*>(arg);
agents.emplace_back(agent);
return HSA_STATUS_SUCCESS;
},
&agents);
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
hsasupport_singleton.GetCoreApiTable().hsa_iterate_agents_fn(
[](hsa_agent_t agent, void *arg){
auto &agents = *reinterpret_cast<agents_t *>(arg);
agents.emplace_back(agent);
return HSA_STATUS_SUCCESS;
},
&agents);
for (const auto& agent : agents) {
const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle);
if (ai.getType() != HSA_DEVICE_TYPE_GPU) {
for (const auto &agent : agents) {
const auto& ai = hsasupport_singleton.GetHSAAgentInfo(agent.handle);
if (ai.GetType() != HSA_DEVICE_TYPE_GPU) {
continue;
}
devices_.emplace(agent.handle, gfxip::device_t{pci_system_initialized_, ai});
@@ -89,13 +89,13 @@ void PCSampler::Stop() {
}
void PCSampler::AddRecord(rocprofiler_record_pc_sample_t& record) {
const auto tool = rocprofiler::GetROCProfilerSingleton();
const auto session = tool->GetSession(session_id_);
rocprofiler::ROCProfiler_Singleton& rocprofiler_instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
const auto session = rocprofiler_instance.GetSession(session_id_);
const auto buffer = session->GetBuffer(buffer_id_);
std::lock_guard<std::mutex> lk(session->GetSessionLock());
record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {tool->GetUniqueRecordId()}};
record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {rocprofiler_instance.GetUniqueRecordId()}};
buffer->AddRecord(record);
}
+3 -3
Просмотреть файл
@@ -127,10 +127,10 @@ uint32_t debugfs_ioctl_read_register(const device_t& dev,
return value;
}
device_t::device_t(const bool pci_inited, const Agent::AgentInfo& info)
device_t::device_t(const bool pci_inited, const HSAAgentInfo& info)
: agent_info_(info), pci_memory_(nullptr) {
const auto pci_domain = agent_info_.getPCIDomain();
const auto pci_location_id = agent_info_.getPCILocationID();
const auto pci_domain = agent_info_.GetDeviceInfo().getPCIDomain();
const auto pci_location_id = agent_info_.GetDeviceInfo().getPCILocationID();
std::string name([pci_domain, pci_location_id]() {
std::ostringstream out;
+3 -2
Просмотреть файл
@@ -37,6 +37,7 @@
#include "rocprofiler.h"
#include "src/core/hardware/hsa_info.h"
#include "src/core/hsa/hsa_support.h"
#include "src/utils/handle.h"
#include <pciaccess.h>
@@ -99,14 +100,14 @@ static constexpr int HWIP_MAX_INSTANCE = 11;
(REG_FIELD_MASK(reg, field) & ((field_val) << REG_FIELD_SHIFT(reg, field))))
struct device_t {
device_t(const bool pci_inited, const Agent::AgentInfo& agent_info);
device_t(const bool pci_inited, const HSAAgentInfo& agent_info);
~device_t();
device_t(const device_t&) = delete;
device_t& operator=(const device_t&) = delete;
device_t(device_t&&) = default;
const Agent::AgentInfo& agent_info_;
const HSAAgentInfo& agent_info_;
struct pci_device* pci_device_;
size_t pci_memory_size_;
+12 -12
Просмотреть файл
@@ -104,8 +104,8 @@ void fill_record(const device_t& dev, rocprofiler_record_pc_sample_t* record, ui
* comment in rocprofiler::hsa_support::Initialize about using KFD's gpu_id for
* more information.
*/
record->pc_sample.gpu_id =
rocprofiler_agent_id_t{(uint64_t)rocprofiler::hsa_support::GetAgentInfo(hdl).getIndex()};
record->pc_sample.gpu_id = rocprofiler_agent_id_t{
HSASupport_Singleton::GetInstance().GetHSAAgentInfo(hdl).GetDeviceInfo().getGPUId()};
}
} // namespace
@@ -116,9 +116,9 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) {
uint32_t saved_grbm_gfx_index = dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)];
uint32_t data;
for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se)
for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh)
for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) {
for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se)
for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh)
for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) {
// Select the SE, SH, and CU.
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, cu);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh);
@@ -126,8 +126,8 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) {
dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)] = data;
// Iterate over all the waves in the compute unit.
for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd)
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) {
for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd)
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) {
// FatalHalt the wave
data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT);
data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE);
@@ -204,16 +204,16 @@ void read_pc_samples_v9_ioctl(const device_t& dev, PCSampler* sampler) {
uint32_t data;
for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se)
for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh)
for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) {
for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se)
for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh)
for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) {
ioc.grbm.se = se;
ioc.grbm.sh = sh;
ioc.grbm.instance = cu;
// Iterate over all the waves in the compute unit.
for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd)
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) {
for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd)
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) {
// FatalHalt the wave
data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT);
data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE);
+2 -1
Просмотреть файл
@@ -5,8 +5,9 @@ add_custom_target(
check
COMMAND ${PROJECT_BINARY_DIR}/run_tests.sh
DEPENDS tests)
add_subdirectory(unittests)
add_subdirectory(featuretests)
add_subdirectory(memorytests)
add_subdirectory(microbenchmarks)
add_subdirectory(HSAToolLibrary)
add_subdirectory(unittests)
configure_file(run_tests.sh ${PROJECT_BINARY_DIR} COPYONLY)
+52
Просмотреть файл
@@ -0,0 +1,52 @@
# ##############################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ##############################################################################
# Setup unit testing env
# Handle HSA Interception Tool Tests
find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH})
find_package(
Clang REQUIRED CONFI
PATHS "${ROCM_PATH}"
PATH_SUFFIXES "llvm/lib/cmake/clang")
file(GLOB TEST_HSATOOl_SRC_FILES ${PROJECT_SOURCE_DIR}/tests-v2/HSAToolLibrary/*.cpp)
add_library(test_hsatool_library SHARED ${TEST_HSATOOl_SRC_FILES})
target_include_directories(test_hsatool_library PRIVATE ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
set_target_properties(
test_hsatool_library
PROPERTIES CXX_VISIBILITY_PRESET hidden
DEFINE_SYMBOL TEST_HSA_TOOL_EXPORTS
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/tests-v2
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
install(
TARGETS test_hsatool_library LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${ROCPROFILER_NAME}
COMPONENT tests)
target_link_libraries(test_hsatool_library PRIVATE hsa-runtime64::hsa-runtime64)
@@ -18,47 +18,41 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_CORE_HSA_HSA_COMMON_H_
#define SRC_CORE_HSA_HSA_COMMON_H_
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ven_amd_aqlprofile.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <map>
#include <mutex>
#include "rocprofiler.h"
#include "src/core/hardware/hsa_info.h"
#define ASSERTM(exp, msg) assert(((void)msg, exp))
namespace rocprofiler {
namespace hsa_support {
#include "HSATool.h"
extern "C" {
std::vector<hsa_agent_t>& GetCPUAgentList();
/*
@brief The HSA_AMD_TOOL_PRIORITY variable must be a constant value type
initialized by the loader itself, not by code during _init. 'extern const'
seems do that although that is not a guarantee.
*/
Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle);
void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info);
hsa_agent_t GetAgentByIndex(uint64_t agent_index);
TEST_HSA_TOOL_API extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
static rocprofiler_onload_callback rocprofiler_onload_callback_call = nullptr;
CoreApiTable& GetCoreApiTable();
void SetCoreApiTable(const CoreApiTable& table);
/*
AmdExtTable GetAmdExtTable();
void SetAmdExtTable(AmdExtTable* table);
@brief Callback function called upon loading the HSA.
The function updates the core api table function pointers to point to the
interceptor functions in this file.
hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi();
void SetHSALoaderApi();
*/
void ResetMaps();
TEST_HSA_TOOL_API bool OnLoad(void* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
rocprofiler_onload_callback_call(table, runtime_version, failed_tool_count, failed_tool_names);
return true;
}
rocprofiler_timestamp_t GetCurrentTimestampNS();
/*
@brief Callback function upon unloading the HSA.
*/
} // namespace hsa_support
} // namespace rocprofiler
TEST_HSA_TOOL_API void OnUnload() { printf("\n\nTool is getting unloaded\n\n"); }
#endif // SRC_CORE_HSA_HSA_COMMON_H_
} // extern "C"
TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback) {
rocprofiler_onload_callback_call = callback;
}
+60
Просмотреть файл
@@ -0,0 +1,60 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
/* Placeholder for calling convention and import/export macros */
#if !defined(TEST_HSA_TOOL_EXPORT_CALL)
#define TEST_HSA_TOOL_EXPORT_CALL
#endif /* !defined (TEST_HSA_TOOL_EXPORT_CALL) */
#if !defined(TEST_HSA_TOOL_EXPORT_DECORATOR)
#if defined(__GNUC__)
#define TEST_HSA_TOOL_EXPORT_DECORATOR __attribute__((visibility("default")))
#elif defined(_MSC_VER)
#define TEST_HSA_TOOL_EXPORT_DECORATOR __declspec(dllexport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (TEST_HSA_TOOL_EXPORT_DECORATOR) */
#if !defined(TEST_HSA_TOOL_IMPORT_DECORATOR)
#if defined(__GNUC__)
#define TEST_HSA_TOOL_IMPORT_DECORATOR
#elif defined(_MSC_VER)
#define TEST_HSA_TOOL_IMPORT_DECORATOR __declspec(dllimport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (TEST_HSA_TOOL_IMPORT_DECORATOR) */
#define TEST_HSA_TOOL_EXPORT TEST_HSA_TOOL_EXPORT_DECORATOR TEST_HSA_TOOL_EXPORT_CALL
#define TEST_HSA_TOOL_IMPORT TEST_HSA_TOOL_IMPORT_DECORATOR TEST_HSA_TOOL_IMPORT_CALL
#if defined(TEST_HSA_TOOL_EXPORTS)
#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT
#else /* !defined (TEST_HSA_TOOL_EXPORTS) */
#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT
#endif /* !defined (TEST_HSA_TOOL_EXPORTS) */
typedef int (*rocprofiler_onload_callback)(
void* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names);
TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback);
+6 -8
Просмотреть файл
@@ -1,17 +1,14 @@
#include <gtest/gtest.h>
#include "src/core/hardware/hsa_info.h"
//#include "src/core/hsa/hsa_common.h"
// Entry Point for Gtests Infra
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
testing::FLAGS_gtest_death_test_style = "threadsafe";
// Add line below to disable any problematic test
hsa_init();
testing::GTEST_FLAG(filter) =
"-OpenMPTest.*:ProfilerSPMTest.*:ProfilerMQTest.*:ProfilerMPTest.*:MPITest.*";
// Disable ATT test fir gfx10 GPUs until its supported
hsa_init();
// iterate for gpu's
hsa_iterate_agents(
[](hsa_agent_t agent, void*) {
@@ -25,8 +22,9 @@ int main(int argc, char** argv) {
}
return HSA_STATUS_SUCCESS;
},
nullptr);
// hsa_shut_down(); // Waiting for hsa_shutdown bug to fix
// Append filter above to disable any problematic test
return RUN_ALL_TESTS();
nullptr);
// Append filter above to disable any problematic test
int res = RUN_ALL_TESTS();
hsa_shut_down();
return res;
}
+5 -1
Просмотреть файл
@@ -5,7 +5,11 @@ CURRENT_DIR="$( dirname -- "$0"; )";
echo -e "Running Profiler Tests"
echo -e "running unit tests for rocprofiler"
eval ${CURRENT_DIR}/tests-v2/unittests/runUnitTests
eval ${CURRENT_DIR}/tests-v2/unittests/core/runCoreUnitTests
echo -e "running unit tests for rocprofiler"
eval ${CURRENT_DIR}/tests-v2/unittests/profiler/runUnitTests
echo -e "running feature tests for rocprofiler"
eval ${CURRENT_DIR}/tests-v2/featuretests/profiler/runFeatureTests
+2 -110
Просмотреть файл
@@ -1,110 +1,2 @@
# Setup unit testing env
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
enable_testing()
find_package(GTest REQUIRED)
find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED)
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
file(GLOB ROCPROFILER_TRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
file(GLOB ROCPROFILER_SRC_CLASS_FILES
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
# Compiling gtests
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
add_executable(
runUnitTests
${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp
${CORE_MEMORY_SRC_FILES}
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_HW_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${ROCPROFILER_SPM_SRC_FILES}
${ROCPROFILER_SRC_FILES}
${CORE_HSA_SRC_FILES}
${CORE_HSA_PACKETS_SRC_FILES}
${CORE_COUNTERS_SRC_FILES}
${CORE_HSA_QUEUES_SRC_FILES}
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES})
target_include_directories(
runUnitTests
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
${PROJECT_BINARY_DIR}/rocprofiler)
target_compile_definitions(
runUnitTests
PUBLIC AMD_INTERNAL_BUILD
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl)
add_dependencies(tests runUnitTests)
install(TARGETS runUnitTests
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
COMPONENT tests)
add_test(AllTests runUnitTests)
add_subdirectory(profiler)
add_subdirectory(core)
+133
Просмотреть файл
@@ -0,0 +1,133 @@
# ##############################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ##############################################################################
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
enable_testing()
find_package(GTest REQUIRED)
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
file(GLOB ROCPROFILER_TRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
file(GLOB ROCPROFILER_SRC_CLASS_FILES
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
# Compiling gtests
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
set(GTEST_MAIN_DIR ${PROJECT_SOURCE_DIR}/tests-v2/unittests/core)
file(GLOB GTEST_MAIN_SRC_FILE ${GTEST_MAIN_DIR}/gtests_main.cpp)
add_executable(
runCoreUnitTests
${CORE_MEMORY_SRC_FILES}
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_HW_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${ROCPROFILER_SPM_SRC_FILES}
${ROCPROFILER_SRC_FILES}
${CORE_HSA_SRC_FILES}
${CORE_HSA_PACKETS_SRC_FILES}
${CORE_COUNTERS_SRC_FILES}
${CORE_HSA_QUEUES_SRC_FILES}
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES}
${GTEST_MAIN_SRC_FILE}
${CMAKE_CURRENT_SOURCE_DIR}/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/HSASingleton/HSASingleton_unittests.cpp
)
target_include_directories(
runCoreUnitTests
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
${PROJECT_BINARY_DIR}/rocprofiler)
target_compile_definitions(
runCoreUnitTests
PUBLIC AMD_INTERNAL_BUILD
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
runCoreUnitTests PRIVATE rocprofiler_tool test_hsatool_library ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES})
add_dependencies(tests runCoreUnitTests)
install(TARGETS runCoreUnitTests
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
COMPONENT tests)
add_test(AllTests runCoreUnitTests)
+139
Просмотреть файл
@@ -0,0 +1,139 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <gtest/gtest.h>
#include <vector>
#include <mutex>
#include <memory>
#include <stdlib.h>
#include "api/rocprofiler_singleton.h"
#include "src/core/hsa/hsa_support.h"
#define MAX_THREADS 10000
struct devices_t {
std::vector<hsa_agent_t> cpu_devices;
std::vector<hsa_agent_t> gpu_devices;
std::vector<hsa_agent_t> other_devices;
};
hsa_status_t device_cb_tool(hsa_agent_t agent, void* data) {
hsa_device_type_t device_type;
devices_t* devices = reinterpret_cast<devices_t*>(data);
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type) != HSA_STATUS_SUCCESS) {
std::cout << "hsa_iterate_agents failed" << std::endl;
std::exit(-1);
}
switch (device_type) {
case HSA_DEVICE_TYPE_CPU:
devices->cpu_devices.push_back(agent);
break;
case HSA_DEVICE_TYPE_GPU:
devices->gpu_devices.push_back(agent);
break;
default:
devices->other_devices.push_back(agent);
break;
}
return HSA_STATUS_SUCCESS;
}
void get_hsa_agents_list_tool(devices_t* device_list) {
// Enumerate the agents.
if (hsa_iterate_agents(device_cb_tool, device_list) != HSA_STATUS_SUCCESS) {
std::cout << "hsa_iterate_agents failed" << std::endl;
std::exit(-1);
}
}
class TestHSASupportSingleton {
public:
uint64_t ref_address;
TestHSASupportSingleton() {
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
rocprofiler::HSASupport_Singleton::GetInstance();
ref_address = reinterpret_cast<long int>(&hsasupport_singleton);
}
};
void instanciateHSASupportSingleton(int index, uint64_t *ref_array) {
TestHSASupportSingleton test;
*(ref_array+index) = test.ref_address;
}
//Add more threads here
TEST(WhenInvokingHSASingleton, HSASupportSingletonInstanciation) {
std::vector<std::thread> threads;
uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS));
for(int i = 0; i < MAX_THREADS; i++) {
threads.emplace_back(instanciateHSASupportSingleton, i, &refaddress[0]);
}
for (auto&& thread : threads) thread.join();
uint64_t ref_addr = refaddress[0];
for(int i = 1; i < MAX_THREADS; i++)
EXPECT_EQ(ref_addr, refaddress[i]) << "HSASingleton Instanciation failed";
}
TEST(WhenInvokingGetHSAInitialize, TestHSASupportSingleton) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
rocprofiler::HSASupport_Singleton::GetInstance();
devices_t device_list;
get_hsa_agents_list_tool(&device_list);
for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) {
[[maybe_unused]]rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
}
EXPECT_EQ(hsasupport_singleton.gpu_agents.size(), device_list.gpu_devices.size()) << "HSAInitialize failed";
}
TEST(WhenInvokingGetHSAAgentInfo, TestHSASupportSingleton) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
rocprofiler::HSASupport_Singleton::GetInstance();
devices_t device_list;
get_hsa_agents_list_tool(&device_list);
for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) {
rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
uint32_t gpu_id;
char name[64];
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(
*it, (hsa_agent_info_t)(HSA_AMD_AGENT_INFO_DRIVER_UID), &gpu_id);
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(*it, HSA_AGENT_INFO_NAME, name);
EXPECT_EQ(agent_info.GetDeviceInfo().getGPUId(), gpu_id) << "HSAAgentInfo has incorrect gpu id for the agent: " << it->handle;
EXPECT_EQ(strcmp(agent_info.GetDeviceInfo().getName().data(), name), 0) << "HSAAgentInfo has incorrect gpu name for the agent: " << it->handle;
}
}
TEST(WhenInvokingQueueInterceptors, TestQueueInterceptors) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
rocprofiler::HSASupport_Singleton::GetInstance();
hsa_queue_t* queue1 = nullptr, *queue2 = nullptr;
hsa_status_t status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, &queue1);
EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue create interceptor failed";
status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, &queue2);
status = hsa_queue_destroy(queue1);
EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue destroy interceptor failed";
}
@@ -0,0 +1,207 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <gtest/gtest.h>
#include <vector>
#include <mutex>
#include <memory>
#include <experimental/filesystem>
#include "api/rocprofiler_singleton.h"
#include "src/core/hsa/hsa_support.h"
namespace fs = std::experimental::filesystem;
using namespace std::string_literals;
#define MAX_THREADS 10000
TEST(WhenTestingDeviceInfo, TestFailFatal) {
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
fs::path node_path = dirp_entry.path();
long long node_id = std::stoll(std::string(dirp_entry.path().stem().string()));
fs::path gpu_path = node_path / "gpu_id";
std::ifstream gpu_id_file(gpu_path);
std::string gpu_id_str;
long long gpu_id = 0;
if (gpu_id_file.is_open()) {
gpu_id_file >> gpu_id_str;
if (!gpu_id_str.empty()) {
gpu_id = std::stoll(gpu_id_str);
if (gpu_id == 0) {
EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, gpu_id), "");
break;
}
}
}
}
}
TEST(WhenTestingDeviceInfo, DeviceInfoReadSuccessfully) {
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
uint64_t gpu_id = 0;
uint32_t wave_front_size = 0;
uint32_t location_id = 0;
uint32_t domain = 0;
uint32_t shader_arrays_per_se = 0;
[[maybe_unused]] uint32_t max_waves_per_simd = 0;
uint32_t array_count = 0;
uint32_t se_num = 0;
uint32_t cu_num = 0;
uint32_t cu_per_simd_array = 0;
uint32_t simd_per_cu = 0;
uint64_t unique_gpu_id = 0;
uint32_t xcc_num = 1;
uint32_t compute_units_per_sh = 0;
long long topology_id = 0;
uint32_t waves_per_cu = 0;
uint32_t wave_slots_per_simd = 0;
rocprofiler::ROCProfiler_Singleton& rocprofiler_instance =
rocprofiler::ROCProfiler_Singleton::GetInstance();
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
fs::path node_path = dirp_entry.path();
topology_id = std::stoll(dirp_entry.path().stem().string());
fs::path gpu_path = node_path / "gpu_id";
std::ifstream gpu_id_file(gpu_path.c_str());
std::string gpu_id_str;
if (gpu_id_file.is_open()) {
gpu_id_file >> gpu_id_str;
if (!gpu_id_str.empty()) {
gpu_id = std::stoll(gpu_id_str);
if (gpu_id > 0) {
const Agent::DeviceInfo& device_info = rocprofiler_instance.GetDeviceInfo(gpu_id);
fs::path properties_path = node_path / "properties";
std::ifstream props_ifs(properties_path);
if (!props_ifs.is_open())
rocprofiler::fatal("Could not open %s/properties", properties_path.c_str());
std::string prop_name;
uint64_t prop_value;
EXPECT_TRUE(gpu_id == device_info.getGPUId());
while (props_ifs >> prop_name >> prop_value) {
if (prop_name == "wave_front_size")
wave_front_size = static_cast<uint32_t>(prop_value);
else if (prop_name == "array_count")
array_count = static_cast<uint32_t>(prop_value);
else if (prop_name == "simd_per_cu")
simd_per_cu = static_cast<uint32_t>(prop_value);
else if (prop_name == "location_id")
location_id = static_cast<uint32_t>(prop_value);
else if (prop_name == "domain")
domain = static_cast<uint32_t>(prop_value);
else if (prop_name == "simd_arrays_per_engine")
shader_arrays_per_se = static_cast<uint32_t>(prop_value);
else if (prop_name == "max_waves_per_simd")
max_waves_per_simd = static_cast<uint32_t>(prop_value);
else if (prop_name == "cu_per_simd_array")
cu_per_simd_array = static_cast<uint32_t>(prop_value);
else if (prop_name == "unique_id")
unique_gpu_id = static_cast<uint64_t>(prop_value);
else if (prop_name == "num_xcc")
xcc_num = static_cast<uint32_t>(prop_value);
}
se_num = array_count / shader_arrays_per_se;
cu_num = cu_per_simd_array * array_count;
waves_per_cu = 8 * simd_per_cu;
compute_units_per_sh = cu_num / (se_num * shader_arrays_per_se);
wave_slots_per_simd = waves_per_cu / simd_per_cu;
EXPECT_TRUE(wave_front_size == device_info.getMaxWaveSize())
<< "Device Info has incorrect wave_front_size ";
EXPECT_TRUE(simd_per_cu == device_info.getSimdCountPerCU())
<< "Device Info has incorrect simd_per_cu";
EXPECT_TRUE(location_id == device_info.getPCILocationID())
<< "Device Info has incorrect location_id";
EXPECT_TRUE(se_num == device_info.getShaderEngineCount())
<< "Device Info has incorrect se_num";
EXPECT_TRUE(waves_per_cu == device_info.getMaxWavesPerCU())
<< "Device Info has incorrect waves_per_cu";
EXPECT_TRUE(domain == device_info.getPCIDomain()) << "Device Info has incorrect domain";
EXPECT_TRUE(shader_arrays_per_se == device_info.getShaderArraysPerSE())
<< "Device Info has incorrect shader_arrays_per_se";
EXPECT_TRUE(cu_num == device_info.getCUCount()) << "Device Info has incorrect cu_num";
EXPECT_TRUE(compute_units_per_sh == device_info.getCUCountPerSH())
<< "Device Info has incorrect compute_units_per_sh ";
EXPECT_TRUE(wave_slots_per_simd == device_info.getWaveSlotsPerSimd())
<< "Device Info has incorrect wave_slots_per_simd ";
EXPECT_TRUE(unique_gpu_id == device_info.getUniqueGPUId())
<< "Device Info has incorrect unique_gpu_id ";
EXPECT_TRUE(xcc_num == device_info.getXccCount()) << "Device Info has incorrect xcc_num ";
;
EXPECT_TRUE(static_cast<uint32_t>(topology_id) == device_info.getNumaNode())
<< "Device Info has incorrect topology_id";
}
}
}
}
}
TEST(WhenTestingDeviceInfo, GetDeviceInfoFail) {
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
uint64_t node_id = 0;
for ([[maybe_unused]] auto const& dirp_entry : fs::directory_iterator{dirp}) {
node_id++;
}
node_id++;
fs::path node_path = sysfs_nodes_path / std::to_string(node_id);
std::stringstream error;
error << "Could not opendir `" << node_path.c_str() << "'";
EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, 1), error.str().c_str());
}
class TestRocprofilerSingleton {
public:
uintptr_t ref_address;
TestRocprofilerSingleton() {
rocprofiler::ROCProfiler_Singleton& rocprofiler =
rocprofiler::ROCProfiler_Singleton::GetInstance();
ref_address = reinterpret_cast<long int>(&rocprofiler);
}
};
void instantiateRocprofiler(uint64_t *target) {
TestRocprofilerSingleton test;
*target = test.ref_address;
}
//Add more threads here
TEST(WhenInvokingRocprofilerSingleton, RocprofilerSingletonInstanciation) {
std::vector<std::thread> threads;
uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS));
for(int i = 0; i < MAX_THREADS; i++) {
threads.emplace_back(instantiateRocprofiler, &refaddress[i]);
}
for (auto&& thread : threads) thread.join();
uint64_t ref_addr = refaddress[0];
for(int i = 1; i < MAX_THREADS; i++)
EXPECT_EQ(ref_addr, refaddress[i]) << "RocprofilerSingleton Instanciation failed";
free(refaddress);
}
+114
Просмотреть файл
@@ -0,0 +1,114 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <gtest/gtest.h>
#include "src/core/hardware/hsa_info.h"
#include "api/rocprofiler_singleton.h"
#include "src/core/hsa/hsa_support.h"
#include "tests-v2/HSAToolLibrary/HSATool.h"
// used for dl_addr to locate the running
// path for executable
int main(int argc, char** argv);
std::string metrics_path;
std::string running_path;
bool is_installed_path() {
std::string path;
char* real_path;
Dl_info dl_info;
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
path = dl_info.dli_fname;
real_path = realpath(path.c_str(), NULL);
if (real_path == nullptr) {
throw(std::string("Error! in extracting real path"));
}
path.clear(); // reset path
path.append(real_path);
if (path.find("/opt") != std::string::npos) {
return true;
}
}
return false;
}
static void init_test_path() {
metrics_path = "libexec/rocprofiler/counters/derived_counters.xml";
if (is_installed_path()) {
running_path = "share/rocprofiler/tests/runCoreUnitTests";
} else {
running_path = "tests-v2/unittests/core/runCoreUnitTests";
}
}
// This function returns the running path of executable
std::string GetRunningPath(std::string string_to_erase) {
std::string path;
const char* real_path;
Dl_info dl_info;
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
std::string to_erase = string_to_erase;
path = dl_info.dli_fname;
real_path = realpath(path.c_str(), NULL);
if (real_path == nullptr) {
throw(std::string("Error! in extracting real path"));
}
path.clear(); // reset path
path.append(real_path);
size_t pos = path.find(to_erase);
if (pos != std::string::npos) path.erase(pos, to_erase.length());
} else {
throw(std::string("Error! in extracting real path"));
}
return path;
}
int HSATool_onload_callback(void* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
rocprofiler::HSASupport_Singleton::GetInstance();
hsasupport_singleton.HSAInitialize(reinterpret_cast<HsaApiTable*>(table));
return true;
}
int main(int argc, char** argv) {
init_test_path();
SetHSACallback(HSATool_onload_callback);
metrics_path = "libexec/rocprofiler/counters/derived_counters.xml";
std::string app_path = GetRunningPath(running_path);
std::stringstream gfx_path;
gfx_path << app_path << metrics_path;
setenv("ROCPROFILER_METRICS_PATH", gfx_path.str().c_str(), true);
testing::InitGoogleTest(&argc, argv);
testing::FLAGS_gtest_death_test_style = "threadsafe";
hsa_init();
int status = RUN_ALL_TESTS();
hsa_shut_down();
return status;
}
+110
Просмотреть файл
@@ -0,0 +1,110 @@
# Setup unit testing env
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
enable_testing()
find_package(GTest REQUIRED)
find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED)
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
file(GLOB ROCPROFILER_TRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
file(GLOB ROCPROFILER_SRC_CLASS_FILES
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
# Compiling gtests
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
add_executable(
runUnitTests
${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp
${CORE_MEMORY_SRC_FILES}
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_SAMPLER_SRC_FILES}
${CORE_HW_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${ROCPROFILER_SPM_SRC_FILES}
${ROCPROFILER_SRC_FILES}
${CORE_HSA_SRC_FILES}
${CORE_HSA_PACKETS_SRC_FILES}
${CORE_COUNTERS_SRC_FILES}
${CORE_HSA_QUEUES_SRC_FILES}
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES}
${GTEST_MAIN_SRC_FILE}
)
target_include_directories(
runUnitTests
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
${PROJECT_BINARY_DIR}/rocprofiler)
target_compile_definitions(
runUnitTests
PUBLIC AMD_INTERNAL_BUILD
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_link_libraries(
runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl)
add_dependencies(tests runUnitTests)
install(TARGETS runUnitTests
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
COMPONENT tests)
add_test(AllTests runUnitTests)
@@ -30,27 +30,12 @@
#include "core/session/session.h"
#include "utils/helper.h"
/*
* ###############################################
* ################TESTING HSA_INFO###############
* ###############################################
*/
TEST(WhenTestingAgentInfoGetterSetters, TestRunsSuccessfully) {
Agent::AgentInfo agent_info = Agent::AgentInfo();
char gpu_name[] = "gfx10";
agent_info.setName(gpu_name);
agent_info.setIndex(0);
agent_info.setType(hsa_device_type_t::HSA_DEVICE_TYPE_GPU);
EXPECT_EQ(agent_info.getName(), gpu_name);
EXPECT_EQ(agent_info.getIndex(), 0);
EXPECT_EQ(agent_info.getType(), hsa_device_type_t::HSA_DEVICE_TYPE_GPU);
Agent::CounterHardwareInfo hw_info(0, "GRBM");
EXPECT_TRUE(getHardwareInfo(0, "GRBM", &hw_info));
}
void buffer_callback_fun(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
@@ -66,7 +51,7 @@ void buffer_callback_fun(const rocprofiler_record_header_t* begin,
// A lot have changed in the class, since this test was written
// Need to rewrite all the test cases again.
TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) {
TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) {
Memory::GenericBuffer* buffer = new Memory::GenericBuffer(
rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0}, 0x8000, buffer_callback_fun);
@@ -285,7 +270,7 @@ void (*callback_fun)(const rocprofiler_record_header_t* begin,
TEST(WhenTestingCounterCollectionMode, TestSucceeds) {
rocprofiler_session_id_t session_id;
rocprofiler::ROCProfiler_Singleton toolobj;
rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance();
session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE);
rocprofiler_filter_id_t filter_id =
toolobj.GetSession(session_id)
@@ -304,7 +289,7 @@ TEST(WhenTestingCounterCollectionMode, TestSucceeds) {
TEST(WhenTestingTimeStampCollectionMode, TestSucceeds) {
rocprofiler_session_id_t session_id;
rocprofiler::ROCProfiler_Singleton toolobj;
rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance();
session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE);
rocprofiler_filter_id_t filter_id =
toolobj.GetSession(session_id)