SWDEV-399631: Converting into singleton class
Rocprofiler and HSAsupport classes have been implemented as
singletons which gets initialized lazily.
Change-Id: I98db4713c7282d88966aeb0ea9df83ba457b2ea3
[ROCm/rocprofiler commit: 4980409c5a]
Этот коммит содержится в:
@@ -72,7 +72,7 @@ if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$RUN_TEST" ] ; then RUN_TEST=no; fi
|
||||
if [ -z "$ASAN" ] ; then ASAN=False; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi
|
||||
|
||||
|
||||
|
||||
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
|
||||
|
||||
@@ -96,6 +96,7 @@ cmake \
|
||||
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
|
||||
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
|
||||
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
|
||||
-DHIP_ROOT_DIR=${ROCM_PATH} \
|
||||
$ROCPROFILER_ROOT
|
||||
|
||||
popd
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <experimental/filesystem>
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
@@ -34,6 +35,7 @@
|
||||
#include "src/utils/logger.h"
|
||||
#include "src/core/memory/generic_buffer.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
extern std::mutex sessions_pending_signal_lock;
|
||||
@@ -42,14 +44,37 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
ROCProfiler_Singleton* rocprofiler_singleton;
|
||||
|
||||
|
||||
// Constructor of ROCProfiler
|
||||
// Takes the buffer size, a buffer callback function and a buffer flush
|
||||
// interval to allocate a buffer pool using GenericStorage Also takes the
|
||||
// replay mode (application replay/kernel replay/user replay) to set the replay
|
||||
// mode for the rocprofiler class object
|
||||
ROCProfiler_Singleton::ROCProfiler_Singleton() : current_session_id_(rocprofiler_session_id_t{0}) {}
|
||||
ROCProfiler_Singleton::ROCProfiler_Singleton() {
|
||||
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
|
||||
if (!fs::exists(sysfs_nodes_path))
|
||||
rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
|
||||
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
|
||||
fs::path node_path = dirp_entry.path();
|
||||
long long node_id = std::stoll(dirp_entry.path().stem().string());
|
||||
fs::path gpu_path = node_path / "gpu_id";
|
||||
std::ifstream gpu_id_file(gpu_path.c_str());
|
||||
std::string gpu_id_str;
|
||||
if (gpu_id_file.is_open()) {
|
||||
gpu_id_file >> gpu_id_str;
|
||||
if (!gpu_id_str.empty()) {
|
||||
long long gpu_id = std::stoll(gpu_id_str);
|
||||
if (gpu_id > 0) {
|
||||
Agent::DeviceInfo deviceInfo(node_id, gpu_id);
|
||||
// Since it is in static initializer, so its protected
|
||||
agent_device_map_.emplace(deviceInfo.getGPUId(), deviceInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Destructor of rocprofiler_singleton
|
||||
// deletes the buffer pool
|
||||
@@ -68,7 +93,11 @@ ROCProfiler_Singleton::~ROCProfiler_Singleton() {
|
||||
}
|
||||
Counter::ClearBasicCounters();
|
||||
}
|
||||
|
||||
ROCProfiler_Singleton& ROCProfiler_Singleton::GetInstance() {
|
||||
static ROCProfiler_Singleton* rocprofiler_singleton_instance =
|
||||
new ROCProfiler_Singleton;
|
||||
return *rocprofiler_singleton_instance;
|
||||
}
|
||||
bool ROCProfiler_Singleton::FindAgent(rocprofiler_agent_id_t agent_id) { return true; }
|
||||
size_t ROCProfiler_Singleton::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind,
|
||||
rocprofiler_agent_id_t agent_id) {
|
||||
@@ -210,28 +239,45 @@ const char* ROCProfiler_Singleton::GetKernelInfo(rocprofiler_kernel_info_kind_t
|
||||
}
|
||||
}
|
||||
|
||||
const Agent::DeviceInfo& ROCProfiler_Singleton::GetDeviceInfo(uint64_t gpu_id) {
|
||||
std::lock_guard<std::mutex> info_map_lock(agent_device_map_mutex_);
|
||||
auto it = agent_device_map_.find(gpu_id);
|
||||
if (it == agent_device_map_.end())
|
||||
rocprofiler::fatal("Device Info is not found for the given id:%ld", gpu_id);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// TODO(aelwazir): To be implemented
|
||||
bool ROCProfiler_Singleton::CheckFilterData(rocprofiler_filter_kind_t filter_kind,
|
||||
rocprofiler_filter_data_t filter_data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// End of ROCProfiler_Singleton Class
|
||||
rocprofiler_timestamp_t ROCProfiler_Singleton::timestamp_ns() {
|
||||
|
||||
ROCProfiler_Singleton* GetROCProfilerSingleton() { return rocprofiler_singleton; }
|
||||
static uint64_t sys_clock_period = 0;
|
||||
struct timespec ts;
|
||||
// We are not full on memory model. At worst each CPU can call it once.
|
||||
// We don't care for this variable's value in global memory as long as we have
|
||||
// non-zero in the CPU cache.
|
||||
if (sys_clock_period == 0) {
|
||||
clock_getres(CLOCK_BOOTTIME, &ts);
|
||||
sys_clock_period = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
|
||||
}
|
||||
|
||||
void InitROCProfilerSingleton() { rocprofiler_singleton = new ROCProfiler_Singleton; }
|
||||
void ResetROCProfilerSingleton() {
|
||||
delete rocprofiler_singleton;
|
||||
// TODO(aelwazir): We need to use std::optional or std::unique_ptr
|
||||
// if (rocprofiler_singleton) rocprofiler_singleton.reset();
|
||||
clock_gettime(CLOCK_BOOTTIME, &ts);
|
||||
uint64_t time = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec));
|
||||
if (sys_clock_period != 1)
|
||||
return rocprofiler_timestamp_t{time / sys_clock_period};
|
||||
else
|
||||
return rocprofiler_timestamp_t{time};
|
||||
}
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestamp() { return hsa_support::GetCurrentTimestampNS(); }
|
||||
|
||||
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) {
|
||||
if (hsa_support::IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
|
||||
if (hsa_support_IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// End of ROCProfiler_Singleton Class
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -41,13 +41,15 @@
|
||||
|
||||
#include "src/core/session/session.h"
|
||||
#include "src/core/session/device_profiling.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class ROCProfiler_Singleton {
|
||||
public:
|
||||
ROCProfiler_Singleton();
|
||||
~ROCProfiler_Singleton();
|
||||
ROCProfiler_Singleton(const ROCProfiler_Singleton&) = delete;
|
||||
ROCProfiler_Singleton& operator=(const ROCProfiler_Singleton&) = delete;
|
||||
static ROCProfiler_Singleton& GetInstance();
|
||||
|
||||
bool FindAgent(rocprofiler_agent_id_t agent_id);
|
||||
size_t GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
|
||||
@@ -84,7 +86,8 @@ class ROCProfiler_Singleton {
|
||||
rocprofiler_filter_data_t filter_data);
|
||||
uint64_t GetUniqueRecordId();
|
||||
uint64_t GetUniqueKernelDispatchId();
|
||||
|
||||
const Agent::DeviceInfo& GetDeviceInfo(uint64_t gpu_id);
|
||||
rocprofiler_timestamp_t timestamp_ns();
|
||||
private:
|
||||
rocprofiler_session_id_t current_session_id_{0};
|
||||
std::mutex session_map_lock_;
|
||||
@@ -92,6 +95,11 @@ class ROCProfiler_Singleton {
|
||||
std::atomic<uint64_t> records_counter_{1};
|
||||
std::mutex device_profiling_session_map_lock_;
|
||||
std::map<uint64_t, DeviceProfileSession*> dev_profiling_sessions_;
|
||||
std::mutex agent_device_map_mutex_;
|
||||
std::unordered_map<uint64_t, Agent::DeviceInfo> agent_device_map_;
|
||||
ROCProfiler_Singleton();
|
||||
~ROCProfiler_Singleton();
|
||||
|
||||
/*
|
||||
* XXX: Associating PC samples with a running kernel requires an identifier
|
||||
* that will be unique across all kernel executions. It is not enough to use
|
||||
@@ -110,14 +118,7 @@ class ROCProfiler_Singleton {
|
||||
std::atomic<uint64_t> kernel_dispatch_counter_{1};
|
||||
};
|
||||
|
||||
void InitROCProfilerSingleton();
|
||||
void ResetROCProfilerSingleton();
|
||||
ROCProfiler_Singleton* GetROCProfilerSingleton();
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestamp();
|
||||
|
||||
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_TOOLS_ROCPROFILER_SINGLETON_H_
|
||||
|
||||
@@ -132,7 +132,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
|
||||
API_METHOD_PREFIX
|
||||
if (api_started.load(std::memory_order_relaxed))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED);
|
||||
rocprofiler::InitROCProfilerSingleton();
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
api_started.exchange(true, std::memory_order_release);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -140,14 +140,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
|
||||
// Finalize the API
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() {
|
||||
API_INIT_CHECKER
|
||||
rocprofiler::ResetROCProfilerSingleton();
|
||||
api_started.exchange(false, std::memory_order_release);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) {
|
||||
API_INIT_CHECKER
|
||||
*timestamp = rocprofiler::GetCurrentTimestamp();
|
||||
*timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns();
|
||||
if (timestamp->value <= 0)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE);
|
||||
API_METHOD_SUFFIX
|
||||
@@ -163,20 +162,21 @@ rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info_size(
|
||||
rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindAgent(agent_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
|
||||
*data_size = rocprofiler::GetROCProfilerSingleton()->GetAgentInfoSize(kind, agent_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
|
||||
*data_size = rocprofiler_singleton.GetAgentInfoSize(kind, agent_id);
|
||||
if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(
|
||||
rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindAgent(agent_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
|
||||
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetAgentInfo(kind, agent_id)))
|
||||
if (!(*data = rocprofiler_singleton.GetAgentInfo(kind, agent_id)))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -184,20 +184,21 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info_size(
|
||||
rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindQueue(queue_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
|
||||
*data_size = rocprofiler::GetROCProfilerSingleton()->GetQueueInfoSize(kind, queue_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
|
||||
*data_size = rocprofiler_singleton.GetQueueInfoSize(kind, queue_id);
|
||||
if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(
|
||||
rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindQueue(queue_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
|
||||
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetQueueInfo(kind, queue_id)))
|
||||
if (!(*data = rocprofiler_singleton.GetQueueInfo(kind, queue_id)))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -205,9 +206,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(
|
||||
rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
// if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id))
|
||||
// if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id))
|
||||
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
|
||||
*data_size = rocprofiler::GetROCProfilerSingleton()->GetKernelInfoSize(kind, kernel_id);
|
||||
*data_size = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfoSize(kind, kernel_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
@@ -216,9 +217,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info(
|
||||
rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, const char** data) {
|
||||
API_INIT_CHECKER
|
||||
// if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id))
|
||||
// if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id))
|
||||
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
|
||||
if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetKernelInfo(kind, kernel_id)))
|
||||
if (!(*data = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfo(kind, kernel_id)))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -227,14 +228,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info_size(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
|
||||
rocprofiler_counter_id_t counter_id, size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->FindCounter(counter_id))
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
|
||||
*data_size = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
*data_size = rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->GetCounterInfoSize(kind, counter_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
|
||||
@@ -245,13 +244,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
|
||||
rocprofiler_counter_id_t counter_id, const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->FindCounter(counter_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
|
||||
if (!(*data = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
if (!(*data = rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->GetCounterInfo(kind, counter_id)))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
|
||||
@@ -280,14 +277,12 @@ rocprofiler_query_tracer_operation_id(rocprofiler_tracer_activity_domain_t domai
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_flush_data(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetBuffer(buffer_id)
|
||||
->Flush())
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->GetBuffer(buffer_id)->Flush())
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -298,9 +293,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(
|
||||
const rocprofiler_record_header_t* record, const rocprofiler_record_header_t** next,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!Memory::GetNextRecord(record, next))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED);
|
||||
@@ -311,7 +307,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_session(
|
||||
rocprofiler_replay_mode_t replay_mode, rocprofiler_session_id_t* session_id) {
|
||||
API_INIT_CHECKER
|
||||
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode);
|
||||
*session_id = rocprofiler::ROCProfiler_Singleton::GetInstance().CreateSession(replay_mode);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -322,15 +318,16 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): CheckFilterData to be implemented
|
||||
// int error_code =
|
||||
// rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind,
|
||||
// rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind,
|
||||
// filter_data);
|
||||
// if (error_code == -1) throw
|
||||
// rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0)
|
||||
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
*filter_id = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
*filter_id = rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->CreateFilter(filter_kind, filter_data, data_count, property);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -338,11 +335,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_filter(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_filter_id_t filter_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyFilter(filter_id);
|
||||
rocprofiler_singleton.GetSession(session_id)->DestroyFilter(filter_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -350,10 +348,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_buffer(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_callback_t buffer_callback,
|
||||
size_t buffer_size, rocprofiler_buffer_id_t* buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
*buffer_id = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
*buffer_id = rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->CreateBuffer(buffer_callback, buffer_size);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -362,12 +361,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_buffer_property_t* buffer_properties, uint32_t buffer_properties_count) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->GetBuffer(buffer_id)
|
||||
->SetProperties(buffer_properties, buffer_properties_count);
|
||||
API_METHOD_SUFFIX
|
||||
@@ -376,11 +376,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyBuffer(buffer_id);
|
||||
rocprofiler_singleton.GetSession(session_id)->DestroyBuffer(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -388,20 +389,18 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_filter_buffer(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
if (!rocprofiler_singleton
|
||||
.GetSession(session_id)
|
||||
->CheckFilterBufferSize(filter_id, buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetBufferId(buffer_id);
|
||||
rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetBufferId(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -409,19 +408,15 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_api_trace_sync_callback(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_sync_callback_t callback) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id))
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
if (rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetFilter(filter_id)
|
||||
->GetKind() != ROCPROFILER_API_TRACE)
|
||||
if (rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->GetKind() !=
|
||||
ROCPROFILER_API_TRACE)
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED);
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetCallback(callback);
|
||||
rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetCallback(callback);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -433,30 +428,31 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): CheckFilterData to be implemented
|
||||
// int error_code =
|
||||
// rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind,
|
||||
// rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind,
|
||||
// filter_data);
|
||||
// if (error_code == -1) throw
|
||||
// rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0)
|
||||
// throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
|
||||
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode);
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
*session_id = rocprofiler_singleton.CreateSession(replay_mode);
|
||||
rocprofiler_filter_id_t filter_id =
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(*session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession(*session_id)
|
||||
->CreateFilter(filter_kind, filter_data, data_count, property);
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(*session_id)
|
||||
->CreateBuffer(buffer_callback, buffer_size);
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler_singleton
|
||||
.GetSession(*session_id)
|
||||
->CreateBuffer(buffer_callback, buffer_size);
|
||||
if (filter_kind == ROCPROFILER_API_TRACE)
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(*session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession(*session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetCallback(callback);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(*session_id)
|
||||
if (!rocprofiler_singleton
|
||||
.GetSession(*session_id)
|
||||
->CheckFilterBufferSize(filter_id, buffer_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(*session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession(*session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetBufferId(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
@@ -466,9 +462,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
rocprofiler::GetROCProfilerSingleton()->DestroySession(session_id);
|
||||
rocprofiler_singleton.DestroySession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -476,16 +473,17 @@ rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_start_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasFilter())
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->HasFilter())
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasBuffer())
|
||||
if (!rocprofiler_singleton.GetSession(session_id)->HasBuffer())
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER);
|
||||
if (rocprofiler::GetROCProfilerSingleton()->HasActiveSession())
|
||||
if (rocprofiler_singleton.HasActiveSession())
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION);
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Start();
|
||||
rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(session_id);
|
||||
rocprofiler_singleton.GetSession(session_id)->Start();
|
||||
rocprofiler_singleton.SetCurrentActiveSession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -493,12 +491,13 @@ rocprofiler_start_session(rocprofiler_session_id_t session_id) {
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_terminate_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id))
|
||||
auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (!rocprofiler_singleton.FindSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocprofiler::GetROCProfilerSingleton()->IsActiveSession(session_id))
|
||||
if (!rocprofiler_singleton.IsActiveSession(session_id))
|
||||
throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE);
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Terminate();
|
||||
rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(rocprofiler_session_id_t{0});
|
||||
rocprofiler_singleton.GetSession(session_id)->Terminate();
|
||||
rocprofiler_singleton.SetCurrentActiveSession(rocprofiler_session_id_t{0});
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -507,8 +506,8 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create
|
||||
int cpu_index, int gpu_index) {
|
||||
API_METHOD_PREFIX
|
||||
std::vector<std::string> counters(counter_names, counter_names + num_counters);
|
||||
*session_id = rocprofiler::GetROCProfilerSingleton()->CreateDeviceProfilingSession(
|
||||
counters, cpu_index, gpu_index);
|
||||
*session_id =
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().CreateDeviceProfilingSession(counters, cpu_index, gpu_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -516,7 +515,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StartSession();
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StartSession();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -524,7 +523,7 @@ rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id)
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_device_profile_metric_t* data) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->PollMetrics(data);
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->PollMetrics(data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -532,7 +531,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StopSession();
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StopSession();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -540,7 +539,7 @@ rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::GetROCProfilerSingleton()->DestroyDeviceProfilingSession(session_id);
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance().DestroyDeviceProfilingSession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -563,7 +562,7 @@ rocprofiler_codeobj_capture_create(
|
||||
uint64_t userdata
|
||||
) {
|
||||
API_METHOD_PREFIX
|
||||
id->handle = rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId();
|
||||
id->handle = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
|
||||
codeobj_record::make_capture(*id, mode, userdata);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
@@ -618,7 +617,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
if (started) rocprofiler::fatal("HSA Tool started already!");
|
||||
started = true;
|
||||
rocprofiler::hsa_support::Initialize(table);
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().HSAInitialize(table);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -627,7 +626,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
*/
|
||||
ROCPROFILER_EXPORT void OnUnload() {
|
||||
if (!started) rocprofiler::fatal("HSA Tool hasn't started yet!");
|
||||
rocprofiler::hsa_support::Finalize();
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().HSAFinalize();
|
||||
started = false;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "eval_metrics.h"
|
||||
#include "src/utils/helper.h"
|
||||
#include "src/core/hsa/hsa_common.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include <set>
|
||||
#include <math.h>
|
||||
|
||||
@@ -93,16 +93,20 @@ bool metrics::ExtractMetricEvents(
|
||||
results_list holds the result objects for each event (which means, basic counters only)
|
||||
*/
|
||||
try {
|
||||
uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount();
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
uint32_t xcc_count = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
|
||||
|
||||
|
||||
for (size_t i = 0; i < metric_names.size(); i++) {
|
||||
counters_vec_t counters_vec;
|
||||
// TODO: saurabh
|
||||
// const Metric* metric = metrics_dict->GetMetricByName(metric_names[i]);
|
||||
const Metric* metric = metrics_dict->Get(metric_names[i]);
|
||||
if (metric == nullptr) {
|
||||
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
fatal("input metric '%s' not supported on this hardware: %s ", metric_names[i].c_str(),
|
||||
agentInfo.getName().data());
|
||||
HSAAgentInfo& agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle);
|
||||
fatal("input metric'%s' not supported on this hardware: %s ", metric_names[i].c_str(),
|
||||
agentInfo.GetDeviceInfo().getName().data());
|
||||
|
||||
}
|
||||
|
||||
// adding result object for derived metric
|
||||
@@ -176,7 +180,7 @@ bool metrics::ExtractMetricEvents(
|
||||
|
||||
bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent,
|
||||
std::vector<results_t*>& results_list) {
|
||||
uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount();
|
||||
uint32_t xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount();
|
||||
uint32_t single_xcc_buff_size = profile->output_buffer.size / (sizeof(uint64_t) * xcc_count);
|
||||
callback_data_t callback_data{&results_list, 0, single_xcc_buff_size};
|
||||
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data);
|
||||
|
||||
@@ -44,6 +44,8 @@ THE SOFTWARE.
|
||||
#include <mutex>
|
||||
#include <unordered_set>
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
namespace rocprofiler {
|
||||
@@ -121,10 +123,10 @@ class MetricsDict {
|
||||
const cache_t* const cache_;
|
||||
};
|
||||
|
||||
static MetricsDict* Create(const Agent::AgentInfo* agent_info) {
|
||||
static MetricsDict* Create(const rocprofiler::HSAAgentInfo* agent_info) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (map_ == NULL) map_ = new map_t;
|
||||
std::string name = agent_info->getGfxip();
|
||||
std::string name = agent_info->GetDeviceInfo().getGfxip();
|
||||
auto ret = map_->insert({name, NULL});
|
||||
if (ret.second) ret.first->second = new MetricsDict(agent_info);
|
||||
return ret.first->second;
|
||||
@@ -195,7 +197,7 @@ class MetricsDict {
|
||||
return (xml_ != NULL) ? xml_->GetNodes("top." + scope + ".metric") : xml::Xml::nodes_t();
|
||||
}
|
||||
|
||||
MetricsDict(const Agent::AgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) {
|
||||
MetricsDict(const rocprofiler::HSAAgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) {
|
||||
std::string xml_name = []() {
|
||||
if (const char* path = getenv("ROCPROFILER_METRICS_PATH"); path != nullptr) return path;
|
||||
return "";
|
||||
@@ -208,14 +210,14 @@ class MetricsDict {
|
||||
}
|
||||
xml_ = xml::Xml::Create(xml_name);
|
||||
if (xml_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metrics .xml open error '" << xml_name << "'");
|
||||
xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->getMaxQueueSize());
|
||||
xml_->AddConst("top.const.metric", "CU_NUM", agent_info->getCUCount());
|
||||
xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->GetDeviceInfo().getMaxQueueSize());
|
||||
xml_->AddConst("top.const.metric", "CU_NUM", agent_info->GetDeviceInfo().getCUCount());
|
||||
xml_->AddConst("top.const.metric", "SIMD_NUM",
|
||||
agent_info->getSimdCountPerCU() * agent_info->getCUCount());
|
||||
xml_->AddConst("top.const.metric", "SE_NUM", agent_info->getShaderEngineCount());
|
||||
agent_info->GetDeviceInfo().getSimdCountPerCU() * agent_info->GetDeviceInfo().getCUCount());
|
||||
xml_->AddConst("top.const.metric", "SE_NUM", agent_info->GetDeviceInfo().getShaderEngineCount());
|
||||
xml_->AddConst("top.const.metric", "LDS_BANKS", 32);
|
||||
ImportMetrics(agent_info, "const");
|
||||
agent_name_ = agent_info->getName();
|
||||
agent_name_ = agent_info->GetDeviceInfo().getName();
|
||||
|
||||
if (agent_name_.find(':') != std::string::npos) // Remove compiler flags from the agent_name
|
||||
agent_name_ = agent_name_.substr(0, agent_name_.find(':'));
|
||||
@@ -233,7 +235,7 @@ class MetricsDict {
|
||||
if (supported_agent_names.find(agent_name_) != supported_agent_names.end()) {
|
||||
ImportMetrics(agent_info, agent_name_);
|
||||
} else {
|
||||
agent_name_ = agent_info->getGfxip();
|
||||
agent_name_ = agent_info->GetDeviceInfo().getGfxip();
|
||||
ImportMetrics(agent_info, agent_name_);
|
||||
}
|
||||
ImportMetrics(agent_info, "global");
|
||||
@@ -244,7 +246,7 @@ class MetricsDict {
|
||||
for (auto& entry : cache_) delete entry.second;
|
||||
}
|
||||
|
||||
static hsa_ven_amd_aqlprofile_id_query_t Translate(const Agent::AgentInfo* agent_info,
|
||||
static hsa_ven_amd_aqlprofile_id_query_t Translate(const rocprofiler::HSAAgentInfo* agent_info,
|
||||
const std::string& block_name) {
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{};
|
||||
profile.agent = hsa_agent_t{agent_info->getHandle()};
|
||||
@@ -256,7 +258,7 @@ class MetricsDict {
|
||||
return query;
|
||||
}
|
||||
|
||||
void ImportMetrics(const Agent::AgentInfo* agent_info, const std::string& scope) {
|
||||
void ImportMetrics(const rocprofiler::HSAAgentInfo* agent_info, const std::string& scope) {
|
||||
auto arr = xml_->GetNodes("top." + scope + ".metric");
|
||||
xml::Xml::node_list_t metrics_list(arr.begin(), arr.end());
|
||||
uint32_t metrics_number = metrics_list.size();
|
||||
@@ -380,7 +382,7 @@ class MetricsDict {
|
||||
}
|
||||
|
||||
xml::Xml* xml_;
|
||||
const Agent::AgentInfo* agent_info_;
|
||||
const rocprofiler::HSAAgentInfo* agent_info_;
|
||||
std::string agent_name_;
|
||||
cache_t cache_;
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "df_counters_mi200.h"
|
||||
#include "df_perfmon_registers_mi200.h"
|
||||
#include "mmio.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
@@ -15,7 +16,7 @@ namespace rocprofiler {
|
||||
/* get ficaa value for accessing CakeDlwmActiveTransferCount */
|
||||
#define AMDGPU_PMU_SET_FICAA(o) ((o << 16) | 0x1AF5)
|
||||
|
||||
DFPerfMonMI200::DFPerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
|
||||
DFPerfMonMI200::DFPerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) {
|
||||
mmio_ = dynamic_cast<mmio::DFPerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::DF_PERFMON, info));
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace rocprofiler {
|
||||
|
||||
class DFPerfMonMI200 : public PerfMon {
|
||||
public:
|
||||
DFPerfMonMI200(const Agent::AgentInfo& info);
|
||||
DFPerfMonMI200(const HSAAgentInfo& info);
|
||||
~DFPerfMonMI200();
|
||||
void Start() override;
|
||||
void Stop(){};
|
||||
|
||||
@@ -51,10 +51,10 @@ void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char*
|
||||
#endif
|
||||
}
|
||||
|
||||
MMIO::MMIO(const Agent::AgentInfo& info)
|
||||
MMIO::MMIO(const HSAAgentInfo& info)
|
||||
: agent_info_(&info), pci_memory_(nullptr), type_(DEFAULT_MMAP) {
|
||||
const auto pci_domain = agent_info_->getPCIDomain();
|
||||
const auto pci_location_id = agent_info_->getPCILocationID();
|
||||
const auto pci_domain = agent_info_->GetDeviceInfo().getPCIDomain();
|
||||
const auto pci_location_id = agent_info_->GetDeviceInfo().getPCILocationID();
|
||||
|
||||
pci_device_ =
|
||||
pci_device_find_by_slot(pci_domain, pci_location_id >> 8, pci_location_id & 0xFF, 0);
|
||||
@@ -123,7 +123,7 @@ bool MMIO::RegisterReadAPI(uint32_t reg_offset, uint32_t& value) {
|
||||
}
|
||||
|
||||
|
||||
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
|
||||
MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const HSAAgentInfo& info) {
|
||||
MMIO* mmio = nullptr;
|
||||
switch (type) {
|
||||
case PCIE_PERFMON: {
|
||||
@@ -152,7 +152,7 @@ MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) {
|
||||
return mmio;
|
||||
}
|
||||
|
||||
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info) {
|
||||
MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info) {
|
||||
MMIO* mmio = nullptr;
|
||||
auto it = mmio_instances_.find(info.getHandle());
|
||||
if (it != mmio_instances_.end()) {
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
|
||||
#include <pciaccess.h>
|
||||
#include <mutex>
|
||||
@@ -67,17 +68,17 @@ class MMIO {
|
||||
virtual ~MMIO();
|
||||
friend class MMIOManager;
|
||||
|
||||
const Agent::AgentInfo& GetAgentInfo() { return *agent_info_; }
|
||||
const HSAAgentInfo& GetAgentInfo() { return *agent_info_; }
|
||||
mmap_type_t Type() { return type_; }
|
||||
|
||||
protected:
|
||||
MMIO(const Agent::AgentInfo& info);
|
||||
MMIO(const HSAAgentInfo& info);
|
||||
|
||||
// default constructor; helpful for derived classes
|
||||
// which want to setup mmio construction differently
|
||||
MMIO() { type_ = DEFAULT_MMAP; };
|
||||
|
||||
const Agent::AgentInfo* agent_info_;
|
||||
const HSAAgentInfo* agent_info_;
|
||||
struct pci_device* pci_device_;
|
||||
size_t pci_memory_size_;
|
||||
uint32_t* pci_memory_;
|
||||
@@ -94,7 +95,7 @@ class PciePerfmonMMIO : public MMIO {
|
||||
friend class MMIOManager;
|
||||
|
||||
protected:
|
||||
PciePerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; };
|
||||
PciePerfmonMMIO(const HSAAgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; };
|
||||
};
|
||||
|
||||
// DFPerfmonMMIO has same mmio setup approach as
|
||||
@@ -104,7 +105,7 @@ class DFPerfmonMMIO : public MMIO {
|
||||
friend class MMIOManager;
|
||||
|
||||
protected:
|
||||
DFPerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; };
|
||||
DFPerfmonMMIO( const HSAAgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; };
|
||||
};
|
||||
/*
|
||||
Class to manage mmio for UMC/DF/PCIe etc.
|
||||
@@ -114,8 +115,8 @@ class DFPerfmonMMIO : public MMIO {
|
||||
*/
|
||||
class MMIOManager {
|
||||
public:
|
||||
static MMIO* CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info);
|
||||
static MMIO* GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info);
|
||||
static MMIO* CreateMMIO(mmap_type_t type, const HSAAgentInfo& info);
|
||||
static MMIO* GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info);
|
||||
static void DestroyMMIOInstance(MMIO* instance);
|
||||
|
||||
private:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
PciePerfMonMI200::PciePerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) {
|
||||
PciePerfMonMI200::PciePerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) {
|
||||
mmio_ =
|
||||
dynamic_cast<mmio::PciePerfmonMMIO*>(mmio::MMIOManager::CreateMMIO(mmio::PCIE_PERFMON, info));
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ namespace rocprofiler {
|
||||
|
||||
class PciePerfMonMI200 : public PerfMon {
|
||||
public:
|
||||
PciePerfMonMI200(const Agent::AgentInfo& info);
|
||||
PciePerfMonMI200(const HSAAgentInfo& info);
|
||||
~PciePerfMonMI200();
|
||||
void SetCounterNames(std::vector<std::string>& counter_names) override;
|
||||
void Start() override;
|
||||
|
||||
@@ -19,9 +19,13 @@
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hsa_info.h"
|
||||
#include <fstream>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
@@ -35,86 +39,120 @@
|
||||
namespace Agent {
|
||||
// AgentInfo Class
|
||||
|
||||
AgentInfo::AgentInfo() {}
|
||||
AgentInfo::AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table) : handle_(agent.handle) {
|
||||
if (table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS)
|
||||
rocprofiler::fatal("hsa_agent_get_info failed");
|
||||
char convert(uint32_t version) {
|
||||
uint32_t diff = version - 10;
|
||||
if (static_cast<char>('a' + diff) >= 'a' && static_cast<char>('a' + diff) <= 'z')
|
||||
return static_cast<char>('a' + diff);
|
||||
rocprofiler::fatal("Incorrect gpu version");
|
||||
}
|
||||
|
||||
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_);
|
||||
DeviceInfo::DeviceInfo(uint32_t topology_id, uint32_t gpu_id) {
|
||||
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes/";
|
||||
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
|
||||
if (!fs::exists(sysfs_nodes_path))
|
||||
rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
|
||||
// Check the type of the device using gpu id
|
||||
if (gpu_id == 0) assert("DeviceInfo does not support CPU");
|
||||
numa_node_ = topology_id;
|
||||
|
||||
fs::path node_path = sysfs_nodes_path / std::to_string(topology_id);
|
||||
if (!fs::exists(node_path)) rocprofiler::fatal("Could not opendir `%s'", node_path.c_str());
|
||||
xcc_num_ = 1;
|
||||
gpu_id_ = gpu_id;
|
||||
fs::path properties_path = node_path / "properties";
|
||||
std::ifstream props_ifs(properties_path);
|
||||
uint32_t cu_per_simd_array = 0, array_count = 0;
|
||||
uint32_t max_waves_per_simd = 0, gfx_target_version = 0;
|
||||
std::string prop_name, minor_version_str, stepping_str;
|
||||
uint64_t prop_value;
|
||||
uint32_t major_version = 0, minor_version = 0, stepping = 0;
|
||||
std::stringstream hex_minor_version;
|
||||
max_wave_size_ = 0;
|
||||
simds_per_cu_ = 0;
|
||||
shader_arrays_per_se_ = 0;
|
||||
se_num_ = 0;
|
||||
waves_per_cu_ = 0;
|
||||
cu_num_ = 0;
|
||||
compute_units_per_sh_ = 0;
|
||||
if (!props_ifs.is_open())
|
||||
rocprofiler::fatal("Could not open %s/properties", properties_path.c_str());
|
||||
while (props_ifs >> prop_name >> prop_value) {
|
||||
if (prop_name == "wave_front_size") {
|
||||
max_wave_size_ = static_cast<uint32_t>(prop_value);
|
||||
if (max_wave_size_ <= 0) rocprofiler::fatal("Invalid max_wave_size_ in the topology file");
|
||||
} else if (prop_name == "cu_per_simd_array") {
|
||||
cu_per_simd_array = static_cast<uint32_t>(prop_value);
|
||||
if (cu_per_simd_array <= 0)
|
||||
rocprofiler::fatal("Invalid cu_per_simd_array in the topology file");
|
||||
} else if (prop_name == "array_count") {
|
||||
array_count = static_cast<uint32_t>(prop_value);
|
||||
if (array_count <= 0) rocprofiler::fatal("Invalid array_count in the topology file");
|
||||
} else if (prop_name == "simd_per_cu") {
|
||||
simds_per_cu_ = static_cast<uint32_t>(prop_value);
|
||||
if (simds_per_cu_ <= 0) rocprofiler::fatal("Invalid simd_per_cu in the topology file");
|
||||
} else if (prop_name == "location_id")
|
||||
pci_location_id_ = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "domain")
|
||||
pci_domain_ = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "simd_arrays_per_engine") {
|
||||
shader_arrays_per_se_ = static_cast<uint32_t>(prop_value);
|
||||
if (shader_arrays_per_se_ <= 0)
|
||||
rocprofiler::fatal("Invalid simd_arrays_per_engine in the topology file");
|
||||
} else if (prop_name == "max_waves_per_simd") {
|
||||
max_waves_per_simd = static_cast<uint32_t>(prop_value);
|
||||
if (max_waves_per_simd <= 0)
|
||||
rocprofiler::fatal("Invalid max_waves_per_simd in the topology file");
|
||||
} else if (prop_name == "gfx_target_version")
|
||||
gfx_target_version = static_cast<uint32_t>(prop_value);
|
||||
|
||||
else if (prop_name == "unique_id")
|
||||
unique_gpu_id_ = static_cast<uint64_t>(prop_value);
|
||||
else if (prop_name == "num_xcc")
|
||||
xcc_num_ = static_cast<uint32_t>(prop_value);
|
||||
}
|
||||
|
||||
se_num_ = array_count / shader_arrays_per_se_;
|
||||
waves_per_cu_ = max_waves_per_simd * simds_per_cu_;
|
||||
cu_num_ = cu_per_simd_array * array_count;
|
||||
major_version = (gfx_target_version / 100) / 100;
|
||||
std::string major_version_str = std::to_string(major_version);
|
||||
minor_version = (gfx_target_version / 100) % 100;
|
||||
if (minor_version > 9)
|
||||
minor_version_str = std::string(1, convert(minor_version));
|
||||
else
|
||||
minor_version_str = std::to_string(minor_version);
|
||||
stepping = (gfx_target_version % 100);
|
||||
if (stepping > 9)
|
||||
stepping_str = std::string(1, convert(stepping));
|
||||
else
|
||||
stepping_str = std::to_string(stepping);
|
||||
std::string gpu_name = "gfx" + major_version_str + minor_version_str + stepping_str;
|
||||
strcpy(name_, gpu_name.c_str());
|
||||
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
|
||||
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
|
||||
const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1);
|
||||
memcpy(gfxip_, name_, gfxip_label_len);
|
||||
gfxip_[gfxip_label_len] = '\0';
|
||||
|
||||
if (type_ != HSA_DEVICE_TYPE_GPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_);
|
||||
table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_);
|
||||
|
||||
table->hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_);
|
||||
|
||||
table->hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_);
|
||||
|
||||
table->hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_);
|
||||
|
||||
if (table->hsa_agent_get_info_fn(agent,
|
||||
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE,
|
||||
&shader_arrays_per_se_) != HSA_STATUS_SUCCESS ||
|
||||
table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
|
||||
&waves_per_cu_) != HSA_STATUS_SUCCESS) {
|
||||
rocprofiler::fatal("hsa_agent_get_info for gfxip hardware configuration failed");
|
||||
}
|
||||
|
||||
compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_);
|
||||
wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_;
|
||||
|
||||
if (table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_DOMAIN,
|
||||
&pci_domain_) != HSA_STATUS_SUCCESS ||
|
||||
table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID,
|
||||
&pci_location_id_) != HSA_STATUS_SUCCESS) {
|
||||
rocprofiler::fatal("hsa_agent_get_info for PCI info failed");
|
||||
}
|
||||
|
||||
// TODO(saurabh, giovanni): Remove this in 5.7
|
||||
if (table->hsa_agent_get_info_fn(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_XCC),
|
||||
&xcc_num_) != HSA_STATUS_SUCCESS) {
|
||||
xcc_num_ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t AgentInfo::getIndex() const { return index_; }
|
||||
hsa_device_type_t AgentInfo::getType() const { return type_; }
|
||||
uint64_t AgentInfo::getHandle() const { return handle_; }
|
||||
const std::string_view AgentInfo::getName() const { return name_; }
|
||||
std::string AgentInfo::getGfxip() const { return std::string(gfxip_); }
|
||||
uint32_t AgentInfo::getMaxWaveSize() const { return max_wave_size_; }
|
||||
uint32_t AgentInfo::getMaxQueueSize() const { return max_queue_size_; }
|
||||
uint32_t AgentInfo::getCUCount() const { return cu_num_; }
|
||||
uint32_t AgentInfo::getSimdCountPerCU() const { return simds_per_cu_; }
|
||||
uint32_t AgentInfo::getShaderEngineCount() const { return se_num_; }
|
||||
uint32_t AgentInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; }
|
||||
uint32_t AgentInfo::getMaxWavesPerCU() const { return waves_per_cu_; }
|
||||
uint32_t AgentInfo::getCUCountPerSH() const { return compute_units_per_sh_; }
|
||||
uint32_t AgentInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; }
|
||||
uint32_t AgentInfo::getPCIDomain() const { return pci_domain_; }
|
||||
uint32_t AgentInfo::getPCILocationID() const { return pci_location_id_; }
|
||||
uint32_t AgentInfo::getXccCount() const { return xcc_num_; }
|
||||
|
||||
void AgentInfo::setIndex(uint64_t index) { index_ = index; }
|
||||
void AgentInfo::setType(hsa_device_type_t type) { type_ = type; }
|
||||
void AgentInfo::setHandle(uint64_t handle) { handle_ = handle; }
|
||||
void AgentInfo::setName(const std::string& name) { strcpy(name_, name.c_str()); }
|
||||
|
||||
void AgentInfo::setNumaNode(uint32_t numa_node) { numa_node_ = numa_node; }
|
||||
uint32_t AgentInfo::getNumaNode() { return numa_node_; }
|
||||
|
||||
void AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent) { near_cpu_agent_ = near_cpu_agent; }
|
||||
hsa_agent_t AgentInfo::getNearCpuAgent() { return near_cpu_agent_; }
|
||||
std::string_view DeviceInfo::getName() const { return name_; }
|
||||
std::string DeviceInfo::getGfxip() const { return std::string(gfxip_); }
|
||||
uint32_t DeviceInfo::getMaxWaveSize() const { return max_wave_size_; }
|
||||
uint32_t DeviceInfo::getMaxQueueSize() const { return max_queue_size_; }
|
||||
uint32_t DeviceInfo::getCUCount() const { return cu_num_; }
|
||||
uint32_t DeviceInfo::getSimdCountPerCU() const { return simds_per_cu_; }
|
||||
uint32_t DeviceInfo::getShaderEngineCount() const { return se_num_; }
|
||||
uint32_t DeviceInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; }
|
||||
uint32_t DeviceInfo::getMaxWavesPerCU() const { return waves_per_cu_; }
|
||||
uint32_t DeviceInfo::getCUCountPerSH() const { return compute_units_per_sh_; }
|
||||
uint32_t DeviceInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; }
|
||||
uint32_t DeviceInfo::getPCIDomain() const { return pci_domain_; }
|
||||
uint32_t DeviceInfo::getPCILocationID() const { return pci_location_id_; }
|
||||
uint32_t DeviceInfo::getXccCount() const { return xcc_num_; }
|
||||
uint64_t DeviceInfo::getUniqueGPUId() const { return unique_gpu_id_; }
|
||||
uint32_t DeviceInfo::getNumaNode() const { return numa_node_; }
|
||||
uint64_t DeviceInfo::getGPUId() const { return gpu_id_; }
|
||||
|
||||
// CounterHardwareInfo Class
|
||||
|
||||
|
||||
@@ -36,18 +36,15 @@ namespace Agent {
|
||||
|
||||
static const uint32_t LDS_BLOCK_SIZE = 128 * 4;
|
||||
|
||||
// XXX TODO: This should be merged into rocprofiler::hsa_support::AgentInfo and
|
||||
// this file should be removed entirely, as it's completely redundant
|
||||
class AgentInfo {
|
||||
|
||||
//DeviceInfo supports only GPU
|
||||
class DeviceInfo {
|
||||
public:
|
||||
AgentInfo();
|
||||
AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table);
|
||||
|
||||
uint64_t getIndex() const;
|
||||
hsa_device_type_t getType() const;
|
||||
uint64_t getHandle() const;
|
||||
const std::string_view getName() const;
|
||||
DeviceInfo() = default;
|
||||
DeviceInfo(uint32_t topology_id, uint32_t gpu_id);
|
||||
|
||||
uint64_t getGPUId() const;
|
||||
std::string_view getName() const;
|
||||
std::string getGfxip() const;
|
||||
uint32_t getMaxWaveSize() const;
|
||||
uint32_t getMaxQueueSize() const;
|
||||
@@ -61,26 +58,11 @@ class AgentInfo {
|
||||
uint32_t getPCIDomain() const;
|
||||
uint32_t getPCILocationID() const;
|
||||
uint32_t getXccCount() const;
|
||||
|
||||
void setIndex(uint64_t index);
|
||||
void setType(hsa_device_type_t type);
|
||||
void setHandle(uint64_t handle);
|
||||
void setName(const std::string& name);
|
||||
|
||||
void setNumaNode(uint32_t numa_node);
|
||||
uint32_t getNumaNode();
|
||||
|
||||
void setNearCpuAgent(hsa_agent_t near_cpu_agent);
|
||||
hsa_agent_t getNearCpuAgent();
|
||||
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
uint64_t getUniqueGPUId() const;
|
||||
uint32_t getNumaNode() const;
|
||||
|
||||
private:
|
||||
uint64_t index_;
|
||||
hsa_device_type_t type_; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint64_t handle_;
|
||||
|
||||
char name_[64];
|
||||
char gfxip_[64];
|
||||
uint32_t max_wave_size_;
|
||||
@@ -95,12 +77,11 @@ class AgentInfo {
|
||||
uint32_t wave_slots_per_simd_;
|
||||
// Number of XCCs on the GPU
|
||||
uint32_t xcc_num_;
|
||||
|
||||
uint32_t pci_domain_;
|
||||
uint32_t pci_location_id_;
|
||||
|
||||
uint64_t unique_gpu_id_;
|
||||
uint32_t numa_node_;
|
||||
hsa_agent_t near_cpu_agent_;
|
||||
uint32_t gpu_id_;
|
||||
};
|
||||
|
||||
// XXX TODO: This should be moved somewhere else so this file can be deleted
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hsa_common.h"
|
||||
|
||||
#include "src/utils/exception.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
namespace hsa_support {
|
||||
|
||||
std::mutex agents_map_lock;
|
||||
std::map<decltype(hsa_agent_t::handle), Agent::AgentInfo> agent_info_map;
|
||||
Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle) {
|
||||
std::lock_guard<std::mutex> lock(agents_map_lock);
|
||||
if (agent_info_map.find(handle) != agent_info_map.end()) {
|
||||
return agent_info_map.at(handle);
|
||||
} else {
|
||||
std::cerr << std::string("Error: Can't find Agent with handle(") << std::to_string(handle)
|
||||
<< ") in this system" << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<hsa_agent_t> cpu_agents_list;
|
||||
|
||||
void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info) {
|
||||
std::lock_guard<std::mutex> lock(agents_map_lock);
|
||||
agent_info_map.emplace(handle, agent_info);
|
||||
if (agent_info.getType() == HSA_DEVICE_TYPE_GPU) {
|
||||
cpu_agents_list.emplace_back(hsa_agent_t{handle});
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<hsa_agent_t>& GetCPUAgentList() { return cpu_agents_list; }
|
||||
|
||||
hsa_agent_t GetAgentByIndex(uint64_t agent_index) {
|
||||
std::lock_guard<std::mutex> lock(agents_map_lock);
|
||||
for (auto& agent_info : agent_info_map) {
|
||||
if (agent_info.second.getIndex() == agent_index) {
|
||||
return hsa_agent_t{agent_info.second.getHandle()};
|
||||
}
|
||||
}
|
||||
std::cerr << std::string("Error: Can't find Agent with Index(") << std::to_string(agent_index)
|
||||
<< ") in this system" << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
CoreApiTable saved_core_api{};
|
||||
CoreApiTable& GetCoreApiTable() { return saved_core_api; }
|
||||
void SetCoreApiTable(const CoreApiTable& table) { saved_core_api = table; }
|
||||
|
||||
AmdExtTable saved_amd_ext_api{};
|
||||
AmdExtTable GetAmdExtTable() { return saved_amd_ext_api; }
|
||||
void SetAmdExtTable(AmdExtTable* table) { saved_amd_ext_api = *table; }
|
||||
|
||||
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
|
||||
hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi() { return hsa_loader_api; }
|
||||
void SetHSALoaderApi() {
|
||||
hsa_status_t status = saved_core_api.hsa_system_get_major_extension_table_fn(
|
||||
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed");
|
||||
}
|
||||
|
||||
void ResetMaps() {
|
||||
if (hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false);
|
||||
status != HSA_STATUS_SUCCESS)
|
||||
assert(!"hsa_amd_profiling_async_copy_enable failed");
|
||||
memset(&saved_core_api, '\0', sizeof(saved_core_api));
|
||||
memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api));
|
||||
memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api));
|
||||
}
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestampNS() {
|
||||
// If the HSA intercept is installed, then use the "original"
|
||||
// 'hsa_system_get_info' function to avoid reporting calls for internal use
|
||||
// of the HSA API by the tracer.
|
||||
auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn;
|
||||
|
||||
// If the HSA intercept is not installed, use the default
|
||||
// 'hsa_system_get_info'.
|
||||
if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info;
|
||||
|
||||
uint64_t sysclock;
|
||||
if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
|
||||
return rocprofiler_timestamp_t{0};
|
||||
else if (status != HSA_STATUS_SUCCESS)
|
||||
assert(!"hsa_system_get_info failed");
|
||||
|
||||
static uint64_t sysclock_period = [&]() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
if (hsa_status_t status =
|
||||
hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
status != HSA_STATUS_SUCCESS)
|
||||
assert(!"hsa_system_get_info failed");
|
||||
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
return rocprofiler_timestamp_t{sysclock * sysclock_period};
|
||||
}
|
||||
|
||||
} // namespace hsa_support
|
||||
} // namespace rocprofiler
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -29,8 +29,10 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
|
||||
#include "hsa_common.h"
|
||||
#include "rocprofiler.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
|
||||
// HSA EVT data type
|
||||
@@ -92,15 +94,78 @@ typedef struct {
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
namespace hsa_support {
|
||||
|
||||
void Initialize(HsaApiTable* Table);
|
||||
hsa_status_t hsa_iterate_agents_cb(hsa_agent_t agent, void* data);
|
||||
void Finalize();
|
||||
class HSAAgentInfo {
|
||||
private:
|
||||
hsa_agent_t agent_;
|
||||
Agent::DeviceInfo device_info_;
|
||||
hsa_agent_t near_cpu_agent_;
|
||||
hsa_device_type_t type_;
|
||||
|
||||
bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
|
||||
|
||||
} // namespace hsa_support
|
||||
public:
|
||||
HSAAgentInfo(hsa_agent_t agent, hsa_device_type_t type) : agent_(agent), type_(type){};
|
||||
uint64_t getHandle() const;
|
||||
const Agent::DeviceInfo& GetDeviceInfo() const;
|
||||
void SetNearCpuAgent(hsa_agent_t near_cpu_agent);
|
||||
void SetDeviceInfo(Agent::DeviceInfo device_info);
|
||||
hsa_agent_t GetNearCpuAgent() const;
|
||||
hsa_device_type_t GetType() const;
|
||||
hsa_amd_memory_pool_t cpu_pool_;
|
||||
hsa_amd_memory_pool_t kernarg_pool_;
|
||||
hsa_amd_memory_pool_t gpu_pool_;
|
||||
};
|
||||
|
||||
|
||||
struct queues_deleter {
|
||||
queues_deleter() {};
|
||||
queues_deleter(queues_deleter&) { };
|
||||
void operator() (void * queue) const;
|
||||
};
|
||||
|
||||
class HSASupport_Singleton {
|
||||
private:
|
||||
HSASupport_Singleton() {};
|
||||
~HSASupport_Singleton() = delete;
|
||||
CoreApiTable saved_core_api;
|
||||
AmdExtTable saved_amd_ext_api;
|
||||
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api;
|
||||
std::mutex info_map_mutex_;
|
||||
std::unordered_map<uint64_t, HSAAgentInfo> HSAagent_info_map_;
|
||||
std::atomic<bool> ksymbols_flag{true};
|
||||
std::atomic<bool> kernel_names_flag{true};
|
||||
std::mutex queues_mutex_;
|
||||
std::unordered_map<hsa_queue_t*, std::unique_ptr<void, queues_deleter&>> queues;
|
||||
void SetCoreApiTable(CoreApiTable& table);
|
||||
void SetAmdExtTable(AmdExtTable& table);
|
||||
void SetHSALoaderApi();
|
||||
|
||||
public:
|
||||
std::vector<hsa_agent_t> gpu_agents;
|
||||
HSAAgentInfo& GetHSAAgentInfo(uint64_t agent_handle);
|
||||
HSAAgentInfo& GetHSAAgentInfo(Agent::DeviceInfo device_info);
|
||||
Agent::DeviceInfo& GetDeviceInfo(HSAAgentInfo* agent_info);
|
||||
std::mutex kernel_names_map_lock;
|
||||
std::map<std::string, std::vector<uint64_t>>* kernel_names;
|
||||
std::mutex ksymbol_map_lock;
|
||||
std::map<uint64_t, std::string>* ksymbols;
|
||||
void SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info);
|
||||
static HSASupport_Singleton& GetInstance();
|
||||
CoreApiTable& GetCoreApiTable();
|
||||
AmdExtTable& GetAmdExtTable();
|
||||
hsa_ven_amd_loader_1_01_pfn_t& GetHSALoaderApi();
|
||||
void AddQueue(hsa_queue_t* queue, std::unique_ptr<void, queues_deleter&>);
|
||||
void RemoveQueue(hsa_queue_t* queue);
|
||||
void HSAInitialize(HsaApiTable* Table);
|
||||
void HSAFinalize();
|
||||
void InitKsymbols();
|
||||
void FinitKsymbols();
|
||||
HSASupport_Singleton(const HSASupport_Singleton&) = delete;
|
||||
HSASupport_Singleton& operator=(const HSASupport_Singleton&) = delete;
|
||||
|
||||
};
|
||||
|
||||
bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
|
||||
} // namespace rocprofiler
|
||||
|
||||
#include "src/core/session/tracer/src/roctracer.h"
|
||||
@@ -126,11 +191,6 @@ uint32_t GetApiCode(const char* str);
|
||||
|
||||
void RegisterTracerCallback(int (*function)(rocprofiler_tracer_activity_domain_t domain,
|
||||
uint32_t operation_id, void* data));
|
||||
rocprofiler_timestamp_t timestamp_ns();
|
||||
|
||||
void Initialize_roctracer(HsaApiTable* table);
|
||||
|
||||
|
||||
} // namespace roctracer::hsa_support
|
||||
|
||||
#endif // SRC_CORE_HSA_HSA_SUPPORT_H_
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include "src/core/counters/basic/basic_counter.h"
|
||||
#include "src/utils/exception.h"
|
||||
#include "src/utils/logger.h"
|
||||
#include "src/core/hsa/hsa_common.h"
|
||||
|
||||
#include "src/core/counters/metrics/metrics.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
@@ -83,13 +82,14 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info");
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
|
||||
err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info");
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
@@ -114,20 +114,22 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) {
|
||||
void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsa_status_t status =
|
||||
rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
|
||||
cpu_agent, FindStandardPool, &(agent_info->cpu_pool));
|
||||
hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
|
||||
cpu_agent, FindStandardPool, &(agent_info->cpu_pool_));
|
||||
CHECK_HSA_STATUS("Error: Command Buffer Pool is not initialized", status);
|
||||
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
|
||||
cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool));
|
||||
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(
|
||||
cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool_));
|
||||
CHECK_HSA_STATUS("Error: Output Buffer Pool is not initialized", status);
|
||||
}
|
||||
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) {
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsa_status_t status =
|
||||
hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, &(agent_info->gpu_pool));
|
||||
hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, &(agent_info->gpu_pool_));
|
||||
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status);
|
||||
}
|
||||
|
||||
@@ -136,13 +138,18 @@ struct block_des_t {
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
std::map<uint32_t, rocprofiler::MetricsDict*> metricsDict;
|
||||
|
||||
static std::atomic<bool> counters_added{false};
|
||||
|
||||
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents) {
|
||||
for (auto& gpu_agent : gpu_agents) {
|
||||
|
||||
std::map<uint32_t, rocprofiler::MetricsDict*> metricsDict;
|
||||
|
||||
|
||||
void CheckPacketReqiurements() {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
for (auto& gpu_agent : hsasupport_singleton.gpu_agents) {
|
||||
// get the instance of MetricsDict
|
||||
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
|
||||
metricsDict[gpu_agent.handle] = rocprofiler::MetricsDict::Create(&agentInfo);
|
||||
}
|
||||
}
|
||||
@@ -155,18 +162,18 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
std::vector<std::string>& counter_names, rocprofiler_session_id_t session_id,
|
||||
bool is_spm) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
if (!counters_added.load(std::memory_order_acquire)) {
|
||||
for (auto& name : counter_names) {
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->AddCounterName(name);
|
||||
if (rocprofiler_singleton.HasActiveSession()) {
|
||||
rocprofiler_singleton.GetSession(session_id)->GetProfiler()->AddCounterName(name);
|
||||
}
|
||||
}
|
||||
counters_added.exchange(true, std::memory_order_release);
|
||||
}
|
||||
|
||||
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
|
||||
std::map<std::string, rocprofiler::results_t*> results_map;
|
||||
std::vector<rocprofiler::event_t> events_list;
|
||||
std::vector<rocprofiler::results_t*> results_list;
|
||||
@@ -330,8 +337,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
<< "Error: Command buffer given size is " << size << std::endl;
|
||||
abort();
|
||||
}
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.cpu_pool, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.cpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
profile->command_buffer.ptr = malloc(size);
|
||||
/*numa_alloc_onnode(
|
||||
@@ -344,11 +351,10 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
}
|
||||
} else {
|
||||
// Both the CPU and GPU can access the memory
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
ag_list_count, ag_list, NULL, profile->command_buffer.ptr);
|
||||
CHECK_HSA_STATUS("Error: Allowing access to Command Buffer", status);
|
||||
}
|
||||
|
||||
if (!is_spm) {
|
||||
status = HSA_STATUS_ERROR;
|
||||
size_t size = profile->output_buffer.size;
|
||||
@@ -358,8 +364,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
<< "Error: Output buffer given size is " << size << std::endl;
|
||||
abort();
|
||||
}
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.kernarg_pool, size, 0, reinterpret_cast<void**>(&profile->output_buffer.ptr));
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.kernarg_pool_, size, 0, reinterpret_cast<void**>(&profile->output_buffer.ptr));
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
profile->output_buffer.ptr = malloc(size);
|
||||
/*numa_alloc_onnode(
|
||||
@@ -372,7 +378,7 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
ag_list_count, ag_list, NULL, profile->output_buffer.ptr);
|
||||
CHECK_HSA_STATUS("Error: GPU Agent can't have output buffer access", status);
|
||||
memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size);
|
||||
@@ -420,29 +426,34 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
|
||||
|
||||
// Preparing an Getting the size of the command and output buffers
|
||||
status = hsa_ven_amd_aqlprofile_start(profile, NULL);
|
||||
|
||||
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
|
||||
size_t ag_list_count = 1;
|
||||
hsa_agent_t ag_list[ag_list_count];
|
||||
ag_list[0] = gpu_agent;
|
||||
|
||||
// Allocating Command Buffer
|
||||
//FixMe: Command buffer and output buffers are allocated repetatively.
|
||||
status = HSA_STATUS_ERROR;
|
||||
size_t size = profile->command_buffer.size;
|
||||
profile->command_buffer.ptr = nullptr;
|
||||
if (size <= 0) return nullptr;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.cpu_pool, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.cpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->command_buffer.ptr)));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
ag_list_count, ag_list, NULL, profile->command_buffer.ptr);
|
||||
CHECK_HSA_STATUS("Error: GPU Agent can't have command buffer access", status);
|
||||
} else {
|
||||
hsa_agent_t near_cpu_node = agentInfo.GetNearCpuAgent();
|
||||
uint32_t near_cpu_node_id = 0;
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(near_cpu_node,
|
||||
HSA_AGENT_INFO_NODE, &near_cpu_node_id);
|
||||
profile->command_buffer.ptr = numa_alloc_onnode(
|
||||
profile->command_buffer.size,
|
||||
rocprofiler::hsa_support::GetAgentInfo(agentInfo.getNearCpuAgent().handle).getNumaNode());
|
||||
near_cpu_node_id);
|
||||
if (profile->command_buffer.ptr != nullptr) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
} else {
|
||||
@@ -455,12 +466,12 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
|
||||
size = profile->output_buffer.size;
|
||||
profile->output_buffer.ptr = nullptr;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.gpu_pool, size, 0, reinterpret_cast<void**>(&(profile->output_buffer.ptr)));
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
agentInfo.gpu_pool_, size, 0, reinterpret_cast<void**>(&(profile->output_buffer.ptr)));
|
||||
CHECK_HSA_STATUS("Error: Can't Allocate Output Buffer", status);
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
ag_list_count, ag_list, NULL, profile->output_buffer.ptr);
|
||||
CHECK_HSA_STATUS("Error: Can't allow access on the Output Buffer for the GPU", status);
|
||||
memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size);
|
||||
@@ -490,11 +501,12 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(
|
||||
*cpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn(
|
||||
ag_list_count, ag_list, NULL, buffer);
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
@@ -504,32 +516,33 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
|
||||
// Allocate memory for use by a kernel of specified size
|
||||
uint8_t* AllocateLocalMemory(size_t size, hsa_amd_memory_pool_t* gpu_pool) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_amd_memory_pool_allocate(*gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(*gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
size_t att_buffer_size) {
|
||||
Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile, size_t att_buffer_size) {
|
||||
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle);
|
||||
profile->command_buffer.ptr =
|
||||
AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool);
|
||||
AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool_);
|
||||
profile->output_buffer.size = att_buffer_size;
|
||||
profile->output_buffer.ptr = (g_output_buffer_local)
|
||||
? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool)
|
||||
: AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool);
|
||||
? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool_)
|
||||
: AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool_);
|
||||
return (profile->command_buffer.ptr && profile->output_buffer.ptr) ? HSA_STATUS_SUCCESS
|
||||
: HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bool AllocateMemoryPools(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
hsa_amd_memory_pool_t* cpu_pool, hsa_amd_memory_pool_t* gpu_pool) {
|
||||
hsa_status_t status = hsa_amd_agent_iterate_memory_pools(cpu_agent, FindStandardPool, cpu_pool);
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(cpu_agent, FindStandardPool, cpu_pool);
|
||||
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(cpu_pool)", status);
|
||||
|
||||
status = hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, gpu_pool);
|
||||
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, gpu_pool);
|
||||
CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status);
|
||||
|
||||
return true;
|
||||
|
||||
@@ -46,8 +46,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent,
|
||||
uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool);
|
||||
void GetCommandBufferMap(std::map<size_t, uint8_t*>);
|
||||
void GetOutputBufferMap(std::map<size_t, uint8_t*>);
|
||||
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info);
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info);
|
||||
void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info);
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info);
|
||||
hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets(
|
||||
hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_event_t* events,
|
||||
uint32_t event_count, packet_t* start_packet, packet_t* stop_packet, packet_t* read_packet);
|
||||
@@ -65,8 +65,7 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po
|
||||
|
||||
void get_command_buffer_map(std::map<size_t, uint8_t*>);
|
||||
void get_outbuffer_map(std::map<size_t, uint8_t*>);
|
||||
void initialize_pools(hsa_agent_t cpu_agent);
|
||||
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents);
|
||||
void CheckPacketReqiurements();
|
||||
|
||||
typedef struct {
|
||||
hsa_amd_memory_pool_t cpu_mem_pool;
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "src/utils/helper.h"
|
||||
#include "src/core/isa_capture/code_object_track.hpp"
|
||||
|
||||
|
||||
#define CHECK_HSA_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { \
|
||||
@@ -74,37 +75,37 @@ static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1,
|
||||
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> att_trace_callback_data_t;
|
||||
|
||||
static std::mutex ksymbol_map_lock;
|
||||
static std::map<uint64_t, std::string>* ksymbols;
|
||||
static std::atomic<bool> ksymbols_flag{true};
|
||||
|
||||
void AddKernelName(uint64_t handle, std::string name) {
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
ksymbols->emplace(handle, name);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
|
||||
hsasupport_singleton.ksymbols->emplace(handle, name);
|
||||
}
|
||||
void RemoveKernelName(uint64_t handle) {
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
ksymbols->erase(handle);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
|
||||
hsasupport_singleton.ksymbols->erase(handle);
|
||||
}
|
||||
std::string GetKernelNameFromKsymbols(uint64_t handle) {
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
if (ksymbols->find(handle) != ksymbols->end())
|
||||
return ksymbols->at(handle);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
|
||||
if (hsasupport_singleton.ksymbols->find(handle) != hsasupport_singleton.ksymbols->end())
|
||||
return hsasupport_singleton.ksymbols->at(handle);
|
||||
else
|
||||
return "Unknown Kernel!";
|
||||
}
|
||||
|
||||
static std::mutex kernel_names_map_lock;
|
||||
static std::map<std::string, std::vector<uint64_t>>* kernel_names;
|
||||
static std::atomic<bool> kernel_names_flag{true};
|
||||
void AddKernelNameWithDispatchID(std::string name, uint64_t id) {
|
||||
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
|
||||
if (kernel_names->find(name) == kernel_names->end())
|
||||
kernel_names->emplace(name, std::vector<uint64_t>());
|
||||
kernel_names->at(name).push_back(id);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.kernel_names_map_lock);
|
||||
if (hsasupport_singleton.kernel_names->find(name) == hsasupport_singleton.kernel_names->end())
|
||||
hsasupport_singleton.kernel_names->emplace(name, std::vector<uint64_t>());
|
||||
hsasupport_singleton.kernel_names->at(name).push_back(id);
|
||||
}
|
||||
std::string GetKernelNameUsingDispatchID(uint64_t given_id) {
|
||||
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
|
||||
for (auto kernel_name : (*kernel_names)) {
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.kernel_names_map_lock);
|
||||
for (auto kernel_name : (*hsasupport_singleton.kernel_names)) {
|
||||
for (auto dispatch_id : kernel_name.second) {
|
||||
if (dispatch_id == given_id) return kernel_name.first;
|
||||
}
|
||||
@@ -112,34 +113,6 @@ std::string GetKernelNameUsingDispatchID(uint64_t given_id) {
|
||||
return "Unknown Kernel!";
|
||||
}
|
||||
|
||||
void InitKsymbols() {
|
||||
if (ksymbols_flag.load(std::memory_order_relaxed)) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
ksymbols = new std::map<uint64_t, std::string>();
|
||||
ksymbols_flag.exchange(false, std::memory_order_release);
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
|
||||
kernel_names = new std::map<std::string, std::vector<uint64_t>>();
|
||||
kernel_names_flag.exchange(false, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
}
|
||||
void FinitKsymbols() {
|
||||
if (!ksymbols_flag.load(std::memory_order_relaxed)) {
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
ksymbols->clear();
|
||||
delete ksymbols;
|
||||
ksymbols_flag.exchange(true, std::memory_order_release);
|
||||
}
|
||||
if (!kernel_names_flag.load(std::memory_order_relaxed)) {
|
||||
std::lock_guard<std::mutex> lock(kernel_names_map_lock);
|
||||
kernel_names->clear();
|
||||
delete kernel_names;
|
||||
kernel_names_flag.exchange(true, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct kernel_descriptor_t {
|
||||
@@ -185,7 +158,8 @@ enum amd_kernel_code_property_t {
|
||||
|
||||
static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) {
|
||||
const kernel_descriptor_t* kernel_code = NULL;
|
||||
hsa_status_t status = hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_query_host_address(
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsa_status_t status = hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_query_host_address(
|
||||
reinterpret_cast<const void*>(kernel_object), reinterpret_cast<const void**>(&kernel_code));
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
kernel_code = reinterpret_cast<kernel_descriptor_t*>(kernel_object);
|
||||
@@ -193,8 +167,8 @@ static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) {
|
||||
return kernel_code;
|
||||
}
|
||||
|
||||
static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
|
||||
const std::string_view& name = info.getName();
|
||||
static uint32_t arch_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
|
||||
|
||||
std::string info_name(name.data(), name.size());
|
||||
if (strcmp(name.data(), "gfx90a") == 0 || strncmp(name.data(), "gfx94", 5) == 0)
|
||||
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc3,
|
||||
@@ -210,23 +184,23 @@ static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_
|
||||
? 8
|
||||
: 4);
|
||||
}
|
||||
static uint32_t accum_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
|
||||
const std::string_view& name = info.getName();
|
||||
static uint32_t accum_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
|
||||
|
||||
std::string info_name(name.data(), name.size());
|
||||
if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(info, kernel_code);
|
||||
if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(name, kernel_code);
|
||||
if (strcmp(info_name.c_str(), "gfx90a") == 0 || strncmp(info_name.c_str(), "gfx94", 5) == 0)
|
||||
return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1,
|
||||
AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) +
|
||||
1) *
|
||||
8 -
|
||||
arch_vgpr_count(info, kernel_code);
|
||||
arch_vgpr_count(name, kernel_code);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t sgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) {
|
||||
static uint32_t sgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) {
|
||||
// GFX10 and later always allocate 128 sgprs.
|
||||
const std::string_view name = info.getName();
|
||||
|
||||
// TODO(srnagara): Recheck the extraction of gfxip from gpu name
|
||||
const char* name_data = name.data();
|
||||
const size_t gfxip_label_len = std::min(name.size() - 2, size_t{63});
|
||||
@@ -259,10 +233,10 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet
|
||||
kernel_properties_ptr.workgroup_size = (uint32_t)workgroup_size;
|
||||
kernel_properties_ptr.lds_size = packet.group_segment_size;
|
||||
kernel_properties_ptr.scratch_size = packet.private_segment_size;
|
||||
Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(agent.handle);
|
||||
kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info, *kernel_code);
|
||||
kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info, *kernel_code);
|
||||
kernel_properties_ptr.sgpr_count = sgpr_count(agent_info, *kernel_code);
|
||||
HSAAgentInfo agent_info = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle);
|
||||
kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
|
||||
kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
|
||||
kernel_properties_ptr.sgpr_count = sgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code);
|
||||
kernel_properties_ptr.wave_size =
|
||||
AMD_HSA_BITS_GET(kernel_code->kernel_code_properties,
|
||||
AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)
|
||||
@@ -275,9 +249,7 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet
|
||||
|
||||
namespace queue {
|
||||
|
||||
using rocprofiler::GetROCProfilerSingleton;
|
||||
|
||||
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
pmc_callback_data_t* passed_data = reinterpret_cast<pmc_callback_data_t*>(data);
|
||||
@@ -330,7 +302,7 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign
|
||||
rocprofiler_record_counter_value_t{value}});
|
||||
}
|
||||
record->counters = counters;
|
||||
rocprofiler::Session* session = GetROCProfilerSingleton()->GetSession(pending->session_id);
|
||||
rocprofiler::Session* session = rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(pending->session_id);
|
||||
void* initial_handle = const_cast<rocprofiler_record_counter_instance_t*>(record->counters);
|
||||
if (session->FindBuffer(pending->buffer_id)) {
|
||||
Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id);
|
||||
@@ -347,7 +319,8 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign
|
||||
|
||||
void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent,
|
||||
att_pending_signal_t& pending) {
|
||||
Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(gpu_agent.handle);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
HSAAgentInfo agent_info = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle);
|
||||
att_trace_callback_data_t data;
|
||||
hsa_status_t status =
|
||||
hsa_ven_amd_aqlprofile_iterate_data(pending.profile, attTraceDataCallback, &data);
|
||||
@@ -373,11 +346,11 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent
|
||||
void* buffer = NULL;
|
||||
if (data_size != 0) {
|
||||
// Allocate buffer on CPU to copy out trace data
|
||||
buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool);
|
||||
buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool_);
|
||||
if (buffer == NULL) fatal("Trace data buffer allocation failed");
|
||||
|
||||
auto status = rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr,
|
||||
data_size);
|
||||
auto status =
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed");
|
||||
|
||||
record->shader_engine_data[se_index].buffer_ptr = buffer;
|
||||
@@ -392,12 +365,13 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent
|
||||
|
||||
bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
auto queue_info_session = static_cast<queue_info_session_t*>(data);
|
||||
if (!queue_info_session || !GetROCProfilerSingleton() ||
|
||||
!GetROCProfilerSingleton()->GetSession(queue_info_session->session_id) ||
|
||||
!GetROCProfilerSingleton()->GetSession(queue_info_session->session_id)->GetProfiler())
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
if (!queue_info_session ||
|
||||
!rocprofiler_singleton.GetSession(queue_info_session->session_id) ||
|
||||
!rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetProfiler())
|
||||
return true;
|
||||
rocprofiler::Session* session =
|
||||
GetROCProfilerSingleton()->GetSession(queue_info_session->session_id);
|
||||
rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id);
|
||||
std::lock_guard<std::mutex> lock(session->GetSessionLock());
|
||||
rocprofiler::profiler::Profiler* profiler = session->GetProfiler();
|
||||
std::vector<pending_signal_t*> pending_signals = const_cast<std::vector<pending_signal_t*>&>(
|
||||
@@ -407,10 +381,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
for (auto it = pending_signals.begin(); it != pending_signals.end();
|
||||
it = pending_signals.erase(it)) {
|
||||
auto& pending = *it;
|
||||
if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal))
|
||||
return true;
|
||||
if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) return true;
|
||||
hsa_amd_profiling_dispatch_time_t time;
|
||||
hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
|
||||
hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn(
|
||||
queue_info_session->agent, pending->original_signal, &time);
|
||||
uint32_t record_count = 1;
|
||||
bool is_individual_xcc_mode = false;
|
||||
@@ -440,7 +413,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
record.correlation_id = rocprofiler_correlation_id_t{pending->correlation_id};
|
||||
|
||||
if (pending->session_id.handle == 0) {
|
||||
pending->session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
|
||||
pending->session_id = rocprofiler_singleton.GetCurrentSessionId();
|
||||
}
|
||||
if (pending->counters_count > 0) {
|
||||
if (xcc_id == 0 && pending->context && pending->context->metrics_list.size() > 0 &&
|
||||
@@ -456,7 +429,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
pending->context->metrics_list,
|
||||
time.end - time.start);
|
||||
AddRecordCounters(&record, pending);
|
||||
} else {
|
||||
}else {
|
||||
if (session->FindBuffer(pending->buffer_id)) {
|
||||
Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id);
|
||||
buffer->AddRecord(record);
|
||||
@@ -467,13 +440,12 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
// TODO(aelwazir): we need a better way of distributing events and free them
|
||||
// if (pending->profile->output_buffer.ptr)
|
||||
// numa_free(pending->profile->output_buffer.ptr, pending->profile->output_buffer.size);
|
||||
hsa_status_t status =
|
||||
rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
(pending->profile->output_buffer.ptr));
|
||||
hsa_status_t status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
(pending->profile->output_buffer.ptr));
|
||||
CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status);
|
||||
// if (pending->profile->command_buffer.ptr)
|
||||
// numa_free(pending->profile->command_buffer.ptr, pending->profile->command_buffer.size);
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
(pending->profile->command_buffer.ptr));
|
||||
CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status);
|
||||
delete pending->profile;
|
||||
@@ -483,9 +455,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
delete pending->context;
|
||||
}
|
||||
if (pending->new_signal.handle)
|
||||
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal);
|
||||
if (queue_info_session->interrupt_signal.handle)
|
||||
hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal);
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal);
|
||||
}
|
||||
}
|
||||
delete queue_info_session;
|
||||
@@ -496,15 +468,13 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) {
|
||||
bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
|
||||
|
||||
auto queue_info_session = static_cast<queue_info_session_t*>(data);
|
||||
if (!queue_info_session || !GetROCProfilerSingleton())
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
if (!queue_info_session ||
|
||||
!rocprofiler_singleton.GetSession(queue_info_session->session_id) ||
|
||||
!rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetAttTracer())
|
||||
return true;
|
||||
|
||||
rocprofiler::Session* session =
|
||||
GetROCProfilerSingleton()->GetSession(queue_info_session->session_id);
|
||||
if (!session) return true;
|
||||
|
||||
std::lock_guard<std::mutex> lock(session->GetSessionLock());
|
||||
|
||||
rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id);
|
||||
rocprofiler::att::AttTracer* att_tracer = session->GetAttTracer();
|
||||
if (!session->GetAttTracer()) return true;
|
||||
|
||||
@@ -516,9 +486,8 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
|
||||
for (auto it = pending_signals.begin(); it != pending_signals.end();
|
||||
it = pending_signals.erase(it)) {
|
||||
auto& pending = *it;
|
||||
if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal))
|
||||
return true;
|
||||
|
||||
std::lock_guard<std::mutex> lock(session->GetSessionLock());
|
||||
if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) return true;
|
||||
rocprofiler_record_att_tracer_t record{};
|
||||
record.kernel_id = rocprofiler_kernel_id_t{pending.kernel_descriptor};
|
||||
record.gpu_id = rocprofiler_agent_id_t{(uint64_t)queue_info_session->gpu_index};
|
||||
@@ -540,7 +509,7 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
|
||||
if (pending.session_id.handle == 0) {
|
||||
pending.session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
|
||||
pending.session_id = rocprofiler_singleton.GetCurrentSessionId();
|
||||
}
|
||||
if (session->FindBuffer(pending.buffer_id)) {
|
||||
Memory::GenericBuffer* buffer = session->GetBuffer(pending.buffer_id);
|
||||
@@ -549,10 +518,10 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) {
|
||||
}
|
||||
codeobj_record::free_capture(record.header.id);
|
||||
|
||||
hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
(pending.profile->output_buffer.ptr));
|
||||
CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status);
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn(
|
||||
(pending.profile->command_buffer.ptr));
|
||||
CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status);
|
||||
delete pending.profile;
|
||||
@@ -580,20 +549,20 @@ void AddVendorSpecificPacket(const Packet::packet_t* packet,
|
||||
}
|
||||
|
||||
void SignalAsyncHandler(const hsa_signal_t& signal, void* data) {
|
||||
hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn(
|
||||
hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn(
|
||||
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, data);
|
||||
CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler failed", status);
|
||||
}
|
||||
|
||||
void signalAsyncHandlerATT(const hsa_signal_t& signal, void* data) {
|
||||
hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn(
|
||||
hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn(
|
||||
signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandlerATT, data);
|
||||
CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler for ATT failed", status);
|
||||
}
|
||||
|
||||
void CreateSignal(uint32_t attribute, hsa_signal_t* signal) {
|
||||
hsa_status_t status =
|
||||
hsa_support::GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
|
||||
HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal);
|
||||
CHECK_HSA_STATUS("Error: hsa_amd_signal_create failed", status);
|
||||
}
|
||||
|
||||
@@ -635,17 +604,16 @@ void ResetSessionID(rocprofiler_session_id_t id) { session_id = id; }
|
||||
|
||||
void CheckNeededProfileConfigs() {
|
||||
rocprofiler_session_id_t internal_session_id;
|
||||
if (GetROCProfilerSingleton())
|
||||
// Getting Session ID
|
||||
internal_session_id = GetROCProfilerSingleton()->GetCurrentSessionId();
|
||||
else
|
||||
internal_session_id = {0};
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
internal_session_id = rocprofiler_singleton.GetCurrentSessionId();
|
||||
|
||||
|
||||
if (session_id.handle == 0 || internal_session_id.handle != session_id.handle) {
|
||||
session_id = internal_session_id;
|
||||
// Getting Counters count from the Session
|
||||
if (session_id.handle > 0 && GetROCProfilerSingleton()) {
|
||||
session = GetROCProfilerSingleton()->GetSession(session_id);
|
||||
if (session_id.handle > 0 ) {
|
||||
session = rocprofiler_singleton.GetSession(session_id);
|
||||
if (session && session->FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) {
|
||||
rocprofiler_filter_id_t filter_id =
|
||||
session->GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION);
|
||||
@@ -690,9 +658,9 @@ std::pair<std::vector<bool>, bool> GetAllowedProfilesList(const void* packets, i
|
||||
std::vector<bool> can_profile_packet;
|
||||
bool b_can_profile_anypacket = false;
|
||||
can_profile_packet.reserve(pkt_count);
|
||||
|
||||
std::lock_guard<std::mutex> lock(ksymbol_map_lock);
|
||||
assert(ksymbols);
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
std::lock_guard<std::mutex> lock(hsasupport_singleton.ksymbol_map_lock);
|
||||
assert(hsasupport_singleton.ksymbols);
|
||||
|
||||
uint32_t current_writer_id = WRITER_ID.load(std::memory_order_relaxed);
|
||||
|
||||
@@ -710,7 +678,7 @@ std::pair<std::vector<bool>, bool> GetAllowedProfilesList(const void* packets, i
|
||||
for (auto id : kernel_profile_dispatch_ids) b_profile_this_object |= id == current_writer_id;
|
||||
try {
|
||||
// Can throw
|
||||
const std::string& kernel_name = ksymbols->at(kdispatch.kernel_object);
|
||||
const std::string& kernel_name = hsasupport_singleton.ksymbols->at(kdispatch.kernel_object);
|
||||
|
||||
// If no filters specified, auto profile this kernel
|
||||
if (kernel_profile_names.size() == 0 && kernel_profile_dispatch_ids.size() == 0 &&
|
||||
@@ -739,7 +707,7 @@ ProcessATTParams(
|
||||
Packet::packet_t& start_packet,
|
||||
Packet::packet_t& stop_packet,
|
||||
Queue& queue_info,
|
||||
Agent::AgentInfo& agentInfo
|
||||
rocprofiler::HSAAgentInfo& agentInfo
|
||||
) {
|
||||
std::vector<hsa_ven_amd_aqlprofile_parameter_t> att_params;
|
||||
int num_att_counters = 0;
|
||||
@@ -805,7 +773,7 @@ ProcessATTParams(
|
||||
* pointer to the packet. This packet is written into the queue by this
|
||||
* interceptor by invoking the writer function.
|
||||
*/
|
||||
void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data,
|
||||
void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer) {
|
||||
|
||||
static const char* env_MAX_ATT_PROFILES = getenv("ROCPROFILER_MAX_ATT_PROFILES");
|
||||
@@ -882,7 +850,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
|
||||
rocprofiler_kernel_properties_t kernel_properties =
|
||||
set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent());
|
||||
if (session) {
|
||||
uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId();
|
||||
uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
|
||||
AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object),
|
||||
record_id);
|
||||
if (session_data_count > 0 && profile.second) {
|
||||
@@ -936,19 +904,21 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
|
||||
(reinterpret_cast<Packet::packet_t*>(&barrier));
|
||||
transformed_packets.emplace_back(*pkt);
|
||||
}
|
||||
Agent::AgentInfo& agentInfo =
|
||||
rocprofiler::hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle);
|
||||
rocprofiler::HSAAgentInfo& agentInfo =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
|
||||
// Creating Async Handler to be called every time the interrupt signal is
|
||||
// marked complete
|
||||
SignalAsyncHandler(
|
||||
interrupt_signal,
|
||||
new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot,
|
||||
queue_info.GetQueueID(), writer_id, interrupt_signal,
|
||||
agentInfo.getIndex(), agentInfo.getXccCount()});
|
||||
new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, queue_info.GetQueueID(),
|
||||
writer_id, interrupt_signal, agentInfo.GetDeviceInfo().getGPUId(),
|
||||
agentInfo.GetDeviceInfo().getXccCount()});
|
||||
ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_add(1, std::memory_order_relaxed);
|
||||
// profile_id++;
|
||||
// } while (replay_mode_count > 0 && profile_id < replay_mode_count); // Profiles loop end
|
||||
|
||||
}
|
||||
|
||||
/* Write the transformed packets to the hardware queue. */
|
||||
writer(&transformed_packets[0], transformed_packets.size());
|
||||
} else if (session_id_snapshot.handle > 0 && pkt_count > 0 && is_att_collection_mode && session &&
|
||||
@@ -957,7 +927,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
|
||||
// Getting Queue Data and Information
|
||||
auto& queue_info = *static_cast<Queue*>(data);
|
||||
std::lock_guard<std::mutex> lk(queue_info.qw_mutex);
|
||||
Agent::AgentInfo agentInfo = hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle);
|
||||
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
|
||||
|
||||
bool can_profile_anypacket = false;
|
||||
std::vector<bool> can_profile_packet;
|
||||
@@ -1021,7 +991,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
|
||||
// list to be processed by the signal interrupt
|
||||
rocprofiler_kernel_properties_t kernel_properties =
|
||||
set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent());
|
||||
uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId();
|
||||
uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId();
|
||||
AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object),
|
||||
record_id);
|
||||
|
||||
@@ -1080,31 +1050,13 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt
|
||||
/* Write the original packets to the hardware queue if no profiling session
|
||||
* is active */
|
||||
writer(packets, pkt_count);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Queue::Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
||||
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue)
|
||||
: cpu_agent_(cpu_agent), gpu_agent_(gpu_agent) {
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_create_fn(
|
||||
gpu_agent, size, type, callback, data, private_segment_size, group_segment_size,
|
||||
&intercept_queue_);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
status = hsa_support::GetAmdExtTable().hsa_amd_profiling_set_profiler_enabled_fn(intercept_queue_,
|
||||
true);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_register_fn(intercept_queue_,
|
||||
WriteInterceptor, this);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
*queue = intercept_queue_;
|
||||
}
|
||||
Queue::Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent, hsa_queue_t* queue)
|
||||
: cpu_agent_(cpu_agent), gpu_agent_(gpu_agent), intercept_queue_(queue) { }
|
||||
|
||||
Queue::~Queue() {
|
||||
while (ACTIVE_INTERRUPT_SIGNAL_COUNT.load(std::memory_order_acquire) > 0) {
|
||||
@@ -1119,15 +1071,8 @@ hsa_agent_t Queue::GetCPUAgent() { return cpu_agent_; }
|
||||
|
||||
uint64_t Queue::GetQueueID() { return intercept_queue_->id; }
|
||||
|
||||
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) {
|
||||
Packet::InitializePools(cpu_agent, agent_info);
|
||||
}
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) {
|
||||
Packet::InitializeGPUPool(gpu_agent, agent_info);
|
||||
}
|
||||
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents) {
|
||||
Packet::CheckPacketReqiurements(gpu_agents);
|
||||
}
|
||||
void CheckPacketReqiurements() {
|
||||
Packet::CheckPacketReqiurements();}
|
||||
|
||||
} // namespace queue
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -39,8 +39,7 @@
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
void InitKsymbols();
|
||||
void FinitKsymbols();
|
||||
|
||||
void AddKernelName(uint64_t handle, std::string kernel_name);
|
||||
void RemoveKernelName(uint64_t handle);
|
||||
void AddKernelNameWithDispatchID(std::string name, uint64_t id);
|
||||
@@ -52,12 +51,12 @@ namespace queue {
|
||||
|
||||
class Queue {
|
||||
public:
|
||||
Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
||||
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue);
|
||||
Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent,
|
||||
hsa_queue_t* queue);
|
||||
~Queue();
|
||||
|
||||
static void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index,
|
||||
void* data, hsa_amd_queue_intercept_packet_writer writer);
|
||||
hsa_queue_t* GetCurrentInterceptQueue();
|
||||
hsa_agent_t GetGPUAgent();
|
||||
hsa_agent_t GetCPUAgent();
|
||||
@@ -69,7 +68,6 @@ class Queue {
|
||||
std::mutex mutex_;
|
||||
hsa_agent_t cpu_agent_;
|
||||
hsa_agent_t gpu_agent_;
|
||||
hsa_queue_t* original_queue_;
|
||||
hsa_queue_t* intercept_queue_;
|
||||
|
||||
hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
@@ -88,12 +86,10 @@ struct queue_info_session_t {
|
||||
|
||||
void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_signal_t& pending);
|
||||
|
||||
void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info);
|
||||
void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info);
|
||||
void CheckPacketReqiurements(std::vector<hsa_agent_t>& gpu_agents);
|
||||
|
||||
void ResetSessionID(rocprofiler_session_id_t id = rocprofiler_session_id_t{0});
|
||||
|
||||
void CheckPacketReqiurements();
|
||||
|
||||
} // namespace queue
|
||||
} // namespace rocprofiler
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6
|
||||
uint64_t mem_size) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
codeobjs[addr] = std::make_shared<codeobj_capture_instance>(
|
||||
addr, URI, mem_addr, mem_size, rocprofiler::GetCurrentTimestamp().value);
|
||||
addr, URI, mem_addr, mem_size, rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value);
|
||||
std::atomic_thread_fence(std::memory_order_release); // Fencing the state of the map
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(codeobj_record::mutex);
|
||||
@@ -87,7 +87,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6
|
||||
|
||||
void codeobj_capture_instance::Unload(uint64_t addr) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
codeobjs.at(addr)->end_time = rocprofiler::GetCurrentTimestamp().value;
|
||||
codeobjs.at(addr)->end_time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
|
||||
codeobjs.erase(addr);
|
||||
}
|
||||
|
||||
|
||||
@@ -69,8 +69,9 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
|
||||
GenericBuffer::~GenericBuffer() {
|
||||
if (is_valid_.load(std::memory_order_acquire)) {
|
||||
std::lock_guard lock(buffer_lock_);
|
||||
// if (rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_))
|
||||
// rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)->DisableTools(id_);
|
||||
//rocprofiler::ROCProfiler_Singleton& instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
//if (instance.GetSession(session_id_))
|
||||
// instance.GetSession(session_id_)->DisableTools(id_);
|
||||
|
||||
Flush();
|
||||
|
||||
|
||||
@@ -35,17 +35,17 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
|
||||
pci_system_initialized_(pci_system_init() == 0)
|
||||
|
||||
{
|
||||
params_ = rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(session_id_)
|
||||
params_ = rocprofiler::ROCProfiler_Singleton::GetInstance()
|
||||
.GetSession(session_id_)
|
||||
->GetFilter(filter_id_)
|
||||
->GetCountersSamplerParameterData();
|
||||
|
||||
std::vector<hsa_agent_t> agents;
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
|
||||
HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* arg) {
|
||||
auto& agents = *reinterpret_cast<std::vector<hsa_agent_t>*>(arg);
|
||||
const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle);
|
||||
if (ai.getType() == HSA_DEVICE_TYPE_GPU) {
|
||||
const auto& ai = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle);
|
||||
if (ai.GetType() == HSA_DEVICE_TYPE_GPU) {
|
||||
agents.emplace_back(agent);
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -62,8 +62,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
|
||||
}
|
||||
|
||||
if (pcie_counter_names.size() > 0) {
|
||||
auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
|
||||
if (agentInfo.getName() == "gfx90a") {
|
||||
auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle);
|
||||
if (agentInfo.GetDeviceInfo().getName()== "gfx90a") {
|
||||
PciePerfMonMI200* perfmon = new PciePerfMonMI200(agentInfo);
|
||||
perfmon->SetCounterNames(pcie_counter_names);
|
||||
perfmon_instances_.push_back(perfmon);
|
||||
@@ -77,8 +77,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id,
|
||||
}
|
||||
|
||||
if (xgmi_counter_names.size() > 0) {
|
||||
auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle);
|
||||
if (agentInfo.getName() == "gfx90a") {
|
||||
auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle);
|
||||
if (agentInfo.GetDeviceInfo().getName() == "gfx90a") {
|
||||
DFPerfMonMI200* perfmon = new DFPerfMonMI200(agentInfo);
|
||||
perfmon->SetCounterNames(xgmi_counter_names);
|
||||
perfmon_instances_.push_back(perfmon);
|
||||
@@ -132,13 +132,13 @@ void CountersSampler::Stop() {
|
||||
}
|
||||
|
||||
void CountersSampler::AddRecord(rocprofiler_record_counters_sampler_t& record) {
|
||||
const auto tool = rocprofiler::GetROCProfilerSingleton();
|
||||
const auto session = tool->GetSession(session_id_);
|
||||
rocprofiler::ROCProfiler_Singleton& tool = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
const auto session = tool.GetSession(session_id_);
|
||||
const auto buffer = session->GetBuffer(buffer_id_);
|
||||
|
||||
std::lock_guard<std::mutex> lk(session->GetSessionLock());
|
||||
|
||||
record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool->GetUniqueRecordId()}};
|
||||
record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool.GetUniqueRecordId()}};
|
||||
|
||||
// Add the record to the buffer(a deep-copy operation) along with
|
||||
// a lambda function to deep-copy the record.counters member to
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "src/utils/exception.h"
|
||||
#include "src/core/hsa/queues/queue.h"
|
||||
// #include "src/core/counters/rdc/rdc_metrics.h"
|
||||
#include "src/core/hsa/hsa_common.h"
|
||||
|
||||
|
||||
#include <exception>
|
||||
#include <typeinfo>
|
||||
@@ -183,8 +183,8 @@ DeviceProfileSession::DeviceProfileSession(std::vector<std::string> profiling_da
|
||||
if (hsa_agent_get_info(gpu_agent_, HSA_AGENT_INFO_NAME, gpu_name) != HSA_STATUS_SUCCESS)
|
||||
fatal("Agent name query failed");
|
||||
|
||||
Agent::AgentInfo* agentInfo = &(hsa_support::GetAgentInfo(gpu_agent_.handle));
|
||||
metrics_dict_ = MetricsDict::Create(agentInfo);
|
||||
HSAAgentInfo agentInfo = (HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent_.handle));
|
||||
metrics_dict_ = MetricsDict::Create(&agentInfo);
|
||||
|
||||
for (auto& d : profiling_data_) {
|
||||
Metric* metric = const_cast<Metric*>(metrics_dict_->Get(d));
|
||||
|
||||
@@ -157,7 +157,7 @@ std::mutex processQueueLock;
|
||||
// std::vector<uint64_t> timestamp_vec;
|
||||
// // Get Buffer
|
||||
// rocprofiler::Session* session =
|
||||
// rocprofiler::GetROCProfilerSingleton()->GetSession(rocprofiler::GetROCProfilerSingleton()->GetCurrentSessionId());
|
||||
// rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(rocprofiler::rocmtool::GetInstance().GetCurrentSessionId());
|
||||
// rocprofiler_filter_id_t filter_id = session->GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION);
|
||||
// rocprofiler::Filter* filter = session->GetFilter(filter_id);
|
||||
// rocprofiler_buffer_id_t buffer_id = filter->GetBufferId();
|
||||
@@ -188,8 +188,7 @@ std::mutex processQueueLock;
|
||||
// }
|
||||
// se++;
|
||||
// }
|
||||
// record.header.id =
|
||||
// rocprofiler_record_id_t{rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()};
|
||||
// record.header.id = rocprofiler_record_id_t{rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId()};
|
||||
// buffer->AddRecord(record);
|
||||
// nSample++;
|
||||
// index += 160;
|
||||
@@ -241,11 +240,10 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) {
|
||||
|
||||
// advance command queue
|
||||
const uint64_t write_idx =
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue,
|
||||
1);
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, 1);
|
||||
while ((write_idx -
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(
|
||||
queue)) >= queue->size) {
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(queue)) >=
|
||||
queue->size) {
|
||||
sched_yield(); // TODO: remove
|
||||
}
|
||||
|
||||
@@ -263,8 +261,8 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) {
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal,
|
||||
write_idx);
|
||||
rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal,
|
||||
write_idx);
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
@@ -290,7 +288,7 @@ hsa_signal_value_t signalWait(const hsa_signal_t& signal, const hsa_signal_value
|
||||
// Probably a maximum wait time should be set. We don't want application to hang because of
|
||||
// unlimited wait.
|
||||
// TODO2 : try 500000 assuming nanosecond granularity -- must be verified.
|
||||
ret_value = rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_wait_scacquire_fn(
|
||||
ret_value = rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_wait_scacquire_fn(
|
||||
signal, HSA_SIGNAL_CONDITION_LT, signal_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
if (ret_value == exp_value) break;
|
||||
@@ -321,9 +319,9 @@ spm::SpmCounters::SpmCounters(rocprofiler_buffer_id_t buffer_id, rocprofiler_fil
|
||||
|
||||
// create signals
|
||||
hsa_status_t status =
|
||||
hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_);
|
||||
HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed");
|
||||
status = hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_);
|
||||
status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed");
|
||||
is_started.store(false, std::memory_order_relaxed);
|
||||
buffer_read_flag.store(false, std::memory_order_relaxed);
|
||||
@@ -417,7 +415,7 @@ rocprofiler_status_t spm::SpmCounters::stopSpm() {
|
||||
hsa_signal_store_screlease(stop_signal_, 1);
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (queue_ != nullptr) {
|
||||
status = hsa_support::GetCoreApiTable().hsa_queue_destroy_fn(queue_);
|
||||
status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_destroy_fn(queue_);
|
||||
queue_ = nullptr;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) rocprofiler::warning("Queue destroy failed");
|
||||
|
||||
@@ -237,34 +237,33 @@ template <activity_domain_t domain> struct ApiTracer {
|
||||
};
|
||||
|
||||
static void Exit(OperationId operation_id, TraceData* trace_data) {
|
||||
if (rocprofiler::GetROCProfilerSingleton()) {
|
||||
if (auto pool = activity_table.Get(operation_id)) {
|
||||
if (rocprofiler::GetROCProfilerSingleton() &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton =
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (auto pool = activity_table.Get(operation_id)) {
|
||||
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
|
||||
rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)) {
|
||||
if (rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
if (rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->IsValid()) {
|
||||
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
std::lock_guard<std::mutex> lock(rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->GetBufferLock());
|
||||
assert(trace_data != nullptr);
|
||||
rocprofiler_record_tracer_t record{};
|
||||
record.header = rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
|
||||
record.domain = domain;
|
||||
record.operation_id = rocprofiler_tracer_operation_id_t{operation_id};
|
||||
record.correlation_id =
|
||||
rocprofiler_tracer_activity_correlation_id_t{trace_data->api_data.correlation_id};
|
||||
record.timestamps = rocprofiler_record_header_timestamp_t{
|
||||
rocprofiler_timestamp_t{trace_data->phase_enter_timestamp},
|
||||
hsa_support::timestamp_ns()};
|
||||
rocprofiler_singleton.timestamp_ns()};
|
||||
record.thread_id = rocprofiler_thread_id_t{GetTid()};
|
||||
record.phase = ROCPROFILER_PHASE_NONE;
|
||||
|
||||
@@ -272,8 +271,7 @@ template <activity_domain_t domain> struct ApiTracer {
|
||||
rocprofiler_record_tracer_t ext_record{};
|
||||
ext_record.header = rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
|
||||
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
|
||||
ext_record.operation_id =
|
||||
rocprofiler_tracer_operation_id_t{ACTIVITY_EXT_OP_EXTERN_ID};
|
||||
@@ -283,21 +281,20 @@ template <activity_domain_t domain> struct ApiTracer {
|
||||
ext_record.phase = ROCPROFILER_PHASE_NONE;
|
||||
// Write the external correlation id record directly followed by the
|
||||
// activity record.
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(std::array<rocprofiler_record_tracer_t, 2>{ext_record, record});
|
||||
} else {
|
||||
// Write record to the buffer.
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
CorrelationIdPop();
|
||||
}
|
||||
|
||||
@@ -332,7 +329,7 @@ template <activity_domain_t domain> struct ApiTracer {
|
||||
trace_data->api_data.correlation_id = CorrelationIdPush();
|
||||
|
||||
if (activity_enabled) {
|
||||
trace_data->phase_enter_timestamp = hsa_support::timestamp_ns().value;
|
||||
trace_data->phase_enter_timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
|
||||
trace_data->phase_enter = nullptr;
|
||||
trace_data->phase_exit = Exit;
|
||||
}
|
||||
@@ -365,6 +362,7 @@ ActivityRegistrationTable<ACTIVITY_DOMAIN_HSA_OPS, IsStopped> hsa_ops_activity_t
|
||||
CallbackRegistrationTable<ACTIVITY_DOMAIN_HSA_EVT, IsStopped> hsa_evt_callback_table;
|
||||
|
||||
int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) {
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return HSA_ApiTracer::Enter(static_cast<HSA_ApiTracer::OperationId>(operation_id),
|
||||
@@ -380,50 +378,47 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
|
||||
// and make the record point to it. Older HIP runtimes do not provide a kernel name,
|
||||
// so record.kernel_name might be null.
|
||||
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
|
||||
if (rocprofiler::GetROCProfilerSingleton() &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)) {
|
||||
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->GetBufferLock());
|
||||
rocprofiler_record_tracer_t rocprofiler_record{};
|
||||
rocprofiler_record.header = rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
|
||||
rocprofiler_record.domain = domain;
|
||||
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{};
|
||||
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind};
|
||||
rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{};
|
||||
rocprofiler_record.correlation_id =
|
||||
rocprofiler_tracer_activity_correlation_id_t{record->correlation_id};
|
||||
rocprofiler_record.timestamps = rocprofiler_record_header_timestamp_t{
|
||||
rocprofiler_timestamp_t{record->begin_ns}, rocprofiler_timestamp_t{record->end_ns}};
|
||||
rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id};
|
||||
rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id};
|
||||
rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()};
|
||||
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
|
||||
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) {
|
||||
rocprofiler_record.name = record->kernel_name;
|
||||
size_t kernel_name_size = (strlen(record->kernel_name) + 1);
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size,
|
||||
[](auto& rocprofiler_record, const void* data) {
|
||||
rocprofiler_record.name = static_cast<const char*>(data);
|
||||
});
|
||||
} else {
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record);
|
||||
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
|
||||
rocprofiler_singleton.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)) {
|
||||
std::lock_guard<std::mutex> lock(
|
||||
rocprofiler_singleton.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->GetBufferLock());
|
||||
|
||||
rocprofiler_record_tracer_t rocprofiler_record{};
|
||||
rocprofiler_record.header = rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
|
||||
rocprofiler_record.domain = domain;
|
||||
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{};
|
||||
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind};
|
||||
rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{};
|
||||
rocprofiler_record.correlation_id =
|
||||
rocprofiler_tracer_activity_correlation_id_t{record->correlation_id};
|
||||
rocprofiler_record.timestamps =
|
||||
rocprofiler_record_header_timestamp_t{rocprofiler_timestamp_t{record->begin_ns},
|
||||
rocprofiler_timestamp_t{record->end_ns}};
|
||||
rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id};
|
||||
rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id};
|
||||
rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()};
|
||||
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
|
||||
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) {
|
||||
rocprofiler_record.name = record->kernel_name;
|
||||
size_t kernel_name_size = (strlen(record->kernel_name) + 1);
|
||||
|
||||
rocprofiler_singleton.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size,
|
||||
[](auto& rocprofiler_record, const void* data) {
|
||||
rocprofiler_record.name = static_cast<const char*>(data);
|
||||
});
|
||||
} else {
|
||||
rocprofiler_singleton.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -438,19 +433,18 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
user_callback->second);
|
||||
return 0;
|
||||
} else {
|
||||
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
|
||||
if (rocprofiler::GetROCProfilerSingleton() &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(
|
||||
|
||||
if (
|
||||
rocprofiler_singleton.GetSession(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id) &&
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(
|
||||
rocprofiler_singleton.GetSession(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
|
||||
->GetBuffer(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)) {
|
||||
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_ROCTX>::ApiData*>(data)) {
|
||||
std::lock_guard<std::mutex> lock(
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(
|
||||
rocprofiler_singleton.
|
||||
GetSession(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
|
||||
->GetBuffer(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)
|
||||
@@ -460,13 +454,13 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{api_data ? api_data->args.id : 0},
|
||||
ACTIVITY_DOMAIN_ROCTX,
|
||||
rocprofiler_tracer_operation_id_t{operation_id},
|
||||
tracer_api_data,
|
||||
rocprofiler_tracer_activity_correlation_id_t{0},
|
||||
rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(),
|
||||
rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(),
|
||||
rocprofiler_timestamp_t{0}},
|
||||
0,
|
||||
0,
|
||||
@@ -477,8 +471,8 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
if (api_data && api_data->args.message) {
|
||||
message_size = strlen(api_data->args.message) + 1;
|
||||
}
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession(
|
||||
rocprofiler_singleton
|
||||
.GetSession(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->session_id)
|
||||
->GetBuffer(
|
||||
reinterpret_cast<session_buffer_id_t*>(user_callback->second)->buffer_id)
|
||||
@@ -496,21 +490,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (auto pool = hsa_ops_activity_table.Get(operation_id)) {
|
||||
if (auto record = static_cast<activity_record_t*>(data)) {
|
||||
if (!rocprofiler::GetROCProfilerSingleton()) return 0;
|
||||
if (rocprofiler::GetROCProfilerSingleton() &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) &&
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
if (rocprofiler_singleton.GetSession((*pool)->session_id) &&
|
||||
rocprofiler_singleton.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)) {
|
||||
std::lock_guard<std::mutex> lock(rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
std::lock_guard<std::mutex> lock(rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->GetBufferLock());
|
||||
rocprofiler_record_tracer_t rocprofiler_record{};
|
||||
rocprofiler_record.header = rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}};
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}};
|
||||
rocprofiler_record.domain = domain;
|
||||
rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{0};
|
||||
rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->op};
|
||||
@@ -525,16 +515,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data)
|
||||
rocprofiler_record.phase = ROCPROFILER_PHASE_NONE;
|
||||
if (record->kernel_name != nullptr && record->op == HSA_OP_ID_DISPATCH) {
|
||||
size_t kernel_name_size = strlen(record->kernel_name) + 1;
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
|
||||
rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record, record->kernel_name, kernel_name_size,
|
||||
[](auto& rocprofiler_record, const void* data) {
|
||||
rocprofiler_record.name = static_cast<const char*>(data);
|
||||
});
|
||||
} else {
|
||||
rocprofiler::GetROCProfilerSingleton()
|
||||
->GetSession((*pool)->session_id)
|
||||
rocprofiler_singleton
|
||||
.GetSession((*pool)->session_id)
|
||||
->GetBuffer((*pool)->buffer_id)
|
||||
->AddRecord(rocprofiler_record);
|
||||
}
|
||||
|
||||
@@ -116,9 +116,10 @@ std::mutex& Tracer::GetTracerLock() { return tracer_lock_; }
|
||||
void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_data, void* args) {
|
||||
api_callback_data_t* args_data = reinterpret_cast<api_callback_data_t*>(args);
|
||||
rocprofiler_tracer_api_data_t api_data{};
|
||||
if (args_data && rocprofiler::GetROCProfilerSingleton() &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id) &&
|
||||
rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id)->GetTracer()) {
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
if (args_data &&
|
||||
rocprofiler_singleton.GetSession(args_data->session_id) &&
|
||||
rocprofiler_singleton.GetSession(args_data->session_id)->GetTracer()) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
@@ -127,12 +128,12 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
|
||||
rocprofiler_record_tracer_t{
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{data ? data->args.id : 0}, ACTIVITY_DOMAIN_ROCTX,
|
||||
rocprofiler_tracer_operation_id_t{cid}, api_data,
|
||||
|
||||
rocprofiler_tracer_activity_correlation_id_t{0},
|
||||
rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(),
|
||||
rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(),
|
||||
rocprofiler_timestamp_t{0}},
|
||||
0, 0, GetTid(), ROCPROFILER_PHASE_ENTER},
|
||||
args_data->session_id);
|
||||
@@ -147,8 +148,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
|
||||
rocprofiler_record_tracer_t{
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API,
|
||||
rocprofiler_tracer_operation_id_t{cid}, api_data,
|
||||
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
|
||||
@@ -161,8 +161,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
|
||||
rocprofiler_record_tracer_t{
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API,
|
||||
rocprofiler_tracer_operation_id_t{cid}, api_data,
|
||||
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
|
||||
@@ -182,8 +181,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
|
||||
rocprofiler_record_tracer_t{
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API,
|
||||
rocprofiler_tracer_operation_id_t{cid}, api_data,
|
||||
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
|
||||
@@ -196,8 +194,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d
|
||||
rocprofiler_record_tracer_t{
|
||||
rocprofiler_record_header_t{
|
||||
ROCPROFILER_TRACER_RECORD,
|
||||
rocprofiler_record_id_t{
|
||||
rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}},
|
||||
rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}},
|
||||
rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API,
|
||||
rocprofiler_tracer_operation_id_t{cid}, api_data,
|
||||
rocprofiler_tracer_activity_correlation_id_t{data->correlation_id},
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "src/api/rocprofiler_singleton.h"
|
||||
#include "src/pcsampler/session/pc_sampler.h"
|
||||
#include "src/pcsampler/gfxip/gfxip.h"
|
||||
#include "src/core/hsa/hsa_common.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
|
||||
namespace rocprofiler::pc_sampler {
|
||||
@@ -59,17 +58,18 @@ void PCSampler::Start() {
|
||||
using agents_t = std::vector<hsa_agent_t>;
|
||||
|
||||
agents_t agents;
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* arg) {
|
||||
auto& agents = *reinterpret_cast<agents_t*>(arg);
|
||||
agents.emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance();
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void *arg){
|
||||
auto &agents = *reinterpret_cast<agents_t *>(arg);
|
||||
agents.emplace_back(agent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agents);
|
||||
|
||||
for (const auto& agent : agents) {
|
||||
const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle);
|
||||
if (ai.getType() != HSA_DEVICE_TYPE_GPU) {
|
||||
for (const auto &agent : agents) {
|
||||
const auto& ai = hsasupport_singleton.GetHSAAgentInfo(agent.handle);
|
||||
if (ai.GetType() != HSA_DEVICE_TYPE_GPU) {
|
||||
continue;
|
||||
}
|
||||
devices_.emplace(agent.handle, gfxip::device_t{pci_system_initialized_, ai});
|
||||
@@ -89,13 +89,13 @@ void PCSampler::Stop() {
|
||||
}
|
||||
|
||||
void PCSampler::AddRecord(rocprofiler_record_pc_sample_t& record) {
|
||||
const auto tool = rocprofiler::GetROCProfilerSingleton();
|
||||
const auto session = tool->GetSession(session_id_);
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
const auto session = rocprofiler_instance.GetSession(session_id_);
|
||||
const auto buffer = session->GetBuffer(buffer_id_);
|
||||
|
||||
std::lock_guard<std::mutex> lk(session->GetSessionLock());
|
||||
|
||||
record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {tool->GetUniqueRecordId()}};
|
||||
record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {rocprofiler_instance.GetUniqueRecordId()}};
|
||||
buffer->AddRecord(record);
|
||||
}
|
||||
|
||||
|
||||
@@ -127,10 +127,10 @@ uint32_t debugfs_ioctl_read_register(const device_t& dev,
|
||||
return value;
|
||||
}
|
||||
|
||||
device_t::device_t(const bool pci_inited, const Agent::AgentInfo& info)
|
||||
device_t::device_t(const bool pci_inited, const HSAAgentInfo& info)
|
||||
: agent_info_(info), pci_memory_(nullptr) {
|
||||
const auto pci_domain = agent_info_.getPCIDomain();
|
||||
const auto pci_location_id = agent_info_.getPCILocationID();
|
||||
const auto pci_domain = agent_info_.GetDeviceInfo().getPCIDomain();
|
||||
const auto pci_location_id = agent_info_.GetDeviceInfo().getPCILocationID();
|
||||
|
||||
std::string name([pci_domain, pci_location_id]() {
|
||||
std::ostringstream out;
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include "src/utils/handle.h"
|
||||
|
||||
#include <pciaccess.h>
|
||||
@@ -99,14 +100,14 @@ static constexpr int HWIP_MAX_INSTANCE = 11;
|
||||
(REG_FIELD_MASK(reg, field) & ((field_val) << REG_FIELD_SHIFT(reg, field))))
|
||||
|
||||
struct device_t {
|
||||
device_t(const bool pci_inited, const Agent::AgentInfo& agent_info);
|
||||
device_t(const bool pci_inited, const HSAAgentInfo& agent_info);
|
||||
~device_t();
|
||||
|
||||
device_t(const device_t&) = delete;
|
||||
device_t& operator=(const device_t&) = delete;
|
||||
device_t(device_t&&) = default;
|
||||
|
||||
const Agent::AgentInfo& agent_info_;
|
||||
const HSAAgentInfo& agent_info_;
|
||||
|
||||
struct pci_device* pci_device_;
|
||||
size_t pci_memory_size_;
|
||||
|
||||
@@ -104,8 +104,8 @@ void fill_record(const device_t& dev, rocprofiler_record_pc_sample_t* record, ui
|
||||
* comment in rocprofiler::hsa_support::Initialize about using KFD's gpu_id for
|
||||
* more information.
|
||||
*/
|
||||
record->pc_sample.gpu_id =
|
||||
rocprofiler_agent_id_t{(uint64_t)rocprofiler::hsa_support::GetAgentInfo(hdl).getIndex()};
|
||||
record->pc_sample.gpu_id = rocprofiler_agent_id_t{
|
||||
HSASupport_Singleton::GetInstance().GetHSAAgentInfo(hdl).GetDeviceInfo().getGPUId()};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -116,9 +116,9 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) {
|
||||
uint32_t saved_grbm_gfx_index = dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)];
|
||||
uint32_t data;
|
||||
|
||||
for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se)
|
||||
for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh)
|
||||
for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) {
|
||||
for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se)
|
||||
for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh)
|
||||
for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) {
|
||||
// Select the SE, SH, and CU.
|
||||
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, cu);
|
||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh);
|
||||
@@ -126,8 +126,8 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) {
|
||||
dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)] = data;
|
||||
|
||||
// Iterate over all the waves in the compute unit.
|
||||
for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd)
|
||||
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) {
|
||||
for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd)
|
||||
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) {
|
||||
// FatalHalt the wave
|
||||
data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT);
|
||||
data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE);
|
||||
@@ -204,16 +204,16 @@ void read_pc_samples_v9_ioctl(const device_t& dev, PCSampler* sampler) {
|
||||
|
||||
uint32_t data;
|
||||
|
||||
for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se)
|
||||
for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh)
|
||||
for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) {
|
||||
for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se)
|
||||
for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh)
|
||||
for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) {
|
||||
ioc.grbm.se = se;
|
||||
ioc.grbm.sh = sh;
|
||||
ioc.grbm.instance = cu;
|
||||
|
||||
// Iterate over all the waves in the compute unit.
|
||||
for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd)
|
||||
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) {
|
||||
for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd)
|
||||
for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) {
|
||||
// FatalHalt the wave
|
||||
data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT);
|
||||
data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE);
|
||||
|
||||
@@ -5,8 +5,9 @@ add_custom_target(
|
||||
check
|
||||
COMMAND ${PROJECT_BINARY_DIR}/run_tests.sh
|
||||
DEPENDS tests)
|
||||
add_subdirectory(unittests)
|
||||
add_subdirectory(featuretests)
|
||||
add_subdirectory(memorytests)
|
||||
add_subdirectory(microbenchmarks)
|
||||
add_subdirectory(HSAToolLibrary)
|
||||
add_subdirectory(unittests)
|
||||
configure_file(run_tests.sh ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# ##############################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
# ##############################################################################
|
||||
|
||||
|
||||
|
||||
# Setup unit testing env
|
||||
# Handle HSA Interception Tool Tests
|
||||
find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH})
|
||||
|
||||
find_package(
|
||||
Clang REQUIRED CONFI
|
||||
PATHS "${ROCM_PATH}"
|
||||
PATH_SUFFIXES "llvm/lib/cmake/clang")
|
||||
|
||||
file(GLOB TEST_HSATOOl_SRC_FILES ${PROJECT_SOURCE_DIR}/tests-v2/HSAToolLibrary/*.cpp)
|
||||
|
||||
add_library(test_hsatool_library SHARED ${TEST_HSATOOl_SRC_FILES})
|
||||
|
||||
target_include_directories(test_hsatool_library PRIVATE ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
set_target_properties(
|
||||
test_hsatool_library
|
||||
PROPERTIES CXX_VISIBILITY_PRESET hidden
|
||||
DEFINE_SYMBOL TEST_HSA_TOOL_EXPORTS
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/tests-v2
|
||||
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
|
||||
install(
|
||||
TARGETS test_hsatool_library LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${ROCPROFILER_NAME}
|
||||
COMPONENT tests)
|
||||
|
||||
target_link_libraries(test_hsatool_library PRIVATE hsa-runtime64::hsa-runtime64)
|
||||
+28
-34
@@ -18,47 +18,41 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_CORE_HSA_HSA_COMMON_H_
|
||||
#define SRC_CORE_HSA_HSA_COMMON_H_
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace hsa_support {
|
||||
#include "HSATool.h"
|
||||
extern "C" {
|
||||
|
||||
|
||||
std::vector<hsa_agent_t>& GetCPUAgentList();
|
||||
/*
|
||||
@brief The HSA_AMD_TOOL_PRIORITY variable must be a constant value type
|
||||
initialized by the loader itself, not by code during _init. 'extern const'
|
||||
seems do that although that is not a guarantee.
|
||||
*/
|
||||
|
||||
Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle);
|
||||
void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info);
|
||||
hsa_agent_t GetAgentByIndex(uint64_t agent_index);
|
||||
TEST_HSA_TOOL_API extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
|
||||
static rocprofiler_onload_callback rocprofiler_onload_callback_call = nullptr;
|
||||
|
||||
CoreApiTable& GetCoreApiTable();
|
||||
void SetCoreApiTable(const CoreApiTable& table);
|
||||
/*
|
||||
|
||||
AmdExtTable GetAmdExtTable();
|
||||
void SetAmdExtTable(AmdExtTable* table);
|
||||
@brief Callback function called upon loading the HSA.
|
||||
The function updates the core api table function pointers to point to the
|
||||
interceptor functions in this file.
|
||||
|
||||
hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi();
|
||||
void SetHSALoaderApi();
|
||||
*/
|
||||
|
||||
void ResetMaps();
|
||||
TEST_HSA_TOOL_API bool OnLoad(void* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
rocprofiler_onload_callback_call(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
return true;
|
||||
}
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestampNS();
|
||||
/*
|
||||
@brief Callback function upon unloading the HSA.
|
||||
*/
|
||||
|
||||
} // namespace hsa_support
|
||||
} // namespace rocprofiler
|
||||
TEST_HSA_TOOL_API void OnUnload() { printf("\n\nTool is getting unloaded\n\n"); }
|
||||
|
||||
#endif // SRC_CORE_HSA_HSA_COMMON_H_
|
||||
} // extern "C"
|
||||
|
||||
TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback) {
|
||||
rocprofiler_onload_callback_call = callback;
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
/* Placeholder for calling convention and import/export macros */
|
||||
#if !defined(TEST_HSA_TOOL_EXPORT_CALL)
|
||||
#define TEST_HSA_TOOL_EXPORT_CALL
|
||||
#endif /* !defined (TEST_HSA_TOOL_EXPORT_CALL) */
|
||||
|
||||
#if !defined(TEST_HSA_TOOL_EXPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define TEST_HSA_TOOL_EXPORT_DECORATOR __attribute__((visibility("default")))
|
||||
#elif defined(_MSC_VER)
|
||||
#define TEST_HSA_TOOL_EXPORT_DECORATOR __declspec(dllexport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (TEST_HSA_TOOL_EXPORT_DECORATOR) */
|
||||
|
||||
#if !defined(TEST_HSA_TOOL_IMPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define TEST_HSA_TOOL_IMPORT_DECORATOR
|
||||
#elif defined(_MSC_VER)
|
||||
#define TEST_HSA_TOOL_IMPORT_DECORATOR __declspec(dllimport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (TEST_HSA_TOOL_IMPORT_DECORATOR) */
|
||||
#define TEST_HSA_TOOL_EXPORT TEST_HSA_TOOL_EXPORT_DECORATOR TEST_HSA_TOOL_EXPORT_CALL
|
||||
#define TEST_HSA_TOOL_IMPORT TEST_HSA_TOOL_IMPORT_DECORATOR TEST_HSA_TOOL_IMPORT_CALL
|
||||
#if defined(TEST_HSA_TOOL_EXPORTS)
|
||||
#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT
|
||||
#else /* !defined (TEST_HSA_TOOL_EXPORTS) */
|
||||
#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT
|
||||
#endif /* !defined (TEST_HSA_TOOL_EXPORTS) */
|
||||
typedef int (*rocprofiler_onload_callback)(
|
||||
void* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names);
|
||||
|
||||
|
||||
TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback);
|
||||
@@ -1,17 +1,14 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
//#include "src/core/hsa/hsa_common.h"
|
||||
|
||||
// Entry Point for Gtests Infra
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
// Add line below to disable any problematic test
|
||||
hsa_init();
|
||||
testing::GTEST_FLAG(filter) =
|
||||
"-OpenMPTest.*:ProfilerSPMTest.*:ProfilerMQTest.*:ProfilerMPTest.*:MPITest.*";
|
||||
// Disable ATT test fir gfx10 GPUs until its supported
|
||||
hsa_init();
|
||||
// iterate for gpu's
|
||||
hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void*) {
|
||||
@@ -25,8 +22,9 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr);
|
||||
// hsa_shut_down(); // Waiting for hsa_shutdown bug to fix
|
||||
// Append filter above to disable any problematic test
|
||||
return RUN_ALL_TESTS();
|
||||
nullptr);
|
||||
// Append filter above to disable any problematic test
|
||||
int res = RUN_ALL_TESTS();
|
||||
hsa_shut_down();
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,11 @@ CURRENT_DIR="$( dirname -- "$0"; )";
|
||||
echo -e "Running Profiler Tests"
|
||||
|
||||
echo -e "running unit tests for rocprofiler"
|
||||
eval ${CURRENT_DIR}/tests-v2/unittests/runUnitTests
|
||||
eval ${CURRENT_DIR}/tests-v2/unittests/core/runCoreUnitTests
|
||||
|
||||
|
||||
echo -e "running unit tests for rocprofiler"
|
||||
eval ${CURRENT_DIR}/tests-v2/unittests/profiler/runUnitTests
|
||||
|
||||
echo -e "running feature tests for rocprofiler"
|
||||
eval ${CURRENT_DIR}/tests-v2/featuretests/profiler/runFeatureTests
|
||||
|
||||
@@ -1,110 +1,2 @@
|
||||
# Setup unit testing env
|
||||
|
||||
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
|
||||
|
||||
enable_testing()
|
||||
find_package(GTest REQUIRED)
|
||||
|
||||
find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED)
|
||||
|
||||
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
|
||||
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
|
||||
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
|
||||
|
||||
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
|
||||
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
|
||||
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
|
||||
|
||||
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
|
||||
file(GLOB ROCPROFILER_TRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
|
||||
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
|
||||
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_CLASS_FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
|
||||
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
|
||||
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
|
||||
|
||||
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
|
||||
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
|
||||
|
||||
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
|
||||
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
|
||||
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
|
||||
|
||||
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
|
||||
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
|
||||
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
|
||||
|
||||
# Compiling gtests
|
||||
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
|
||||
add_executable(
|
||||
runUnitTests
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp
|
||||
${CORE_MEMORY_SRC_FILES}
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_HW_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
${CORE_HSA_SRC_FILES}
|
||||
${CORE_HSA_PACKETS_SRC_FILES}
|
||||
${CORE_COUNTERS_SRC_FILES}
|
||||
${CORE_HSA_QUEUES_SRC_FILES}
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES})
|
||||
|
||||
target_include_directories(
|
||||
runUnitTests
|
||||
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
|
||||
${PROJECT_BINARY_DIR}/rocprofiler)
|
||||
|
||||
target_compile_definitions(
|
||||
runUnitTests
|
||||
PUBLIC AMD_INTERNAL_BUILD
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_link_libraries(
|
||||
runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
|
||||
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl)
|
||||
|
||||
add_dependencies(tests runUnitTests)
|
||||
install(TARGETS runUnitTests
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
|
||||
COMPONENT tests)
|
||||
add_test(AllTests runUnitTests)
|
||||
add_subdirectory(profiler)
|
||||
add_subdirectory(core)
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
# ##############################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
# ##############################################################################
|
||||
|
||||
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
|
||||
|
||||
enable_testing()
|
||||
find_package(GTest REQUIRED)
|
||||
|
||||
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
|
||||
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
|
||||
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
|
||||
|
||||
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
|
||||
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
|
||||
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
|
||||
|
||||
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
|
||||
file(GLOB ROCPROFILER_TRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
|
||||
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
|
||||
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_CLASS_FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
|
||||
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
|
||||
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
|
||||
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
|
||||
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
|
||||
|
||||
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
|
||||
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
|
||||
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
|
||||
|
||||
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
|
||||
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
|
||||
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
|
||||
|
||||
# Compiling gtests
|
||||
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
set(GTEST_MAIN_DIR ${PROJECT_SOURCE_DIR}/tests-v2/unittests/core)
|
||||
file(GLOB GTEST_MAIN_SRC_FILE ${GTEST_MAIN_DIR}/gtests_main.cpp)
|
||||
add_executable(
|
||||
runCoreUnitTests
|
||||
${CORE_MEMORY_SRC_FILES}
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_HW_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
${CORE_HSA_SRC_FILES}
|
||||
${CORE_HSA_PACKETS_SRC_FILES}
|
||||
${CORE_COUNTERS_SRC_FILES}
|
||||
${CORE_HSA_QUEUES_SRC_FILES}
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES}
|
||||
${GTEST_MAIN_SRC_FILE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/HSASingleton/HSASingleton_unittests.cpp
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
runCoreUnitTests
|
||||
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
|
||||
${PROJECT_BINARY_DIR}/rocprofiler)
|
||||
|
||||
target_compile_definitions(
|
||||
runCoreUnitTests
|
||||
PUBLIC AMD_INTERNAL_BUILD
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
|
||||
target_link_libraries(
|
||||
runCoreUnitTests PRIVATE rocprofiler_tool test_hsatool_library ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
|
||||
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES})
|
||||
|
||||
|
||||
add_dependencies(tests runCoreUnitTests)
|
||||
install(TARGETS runCoreUnitTests
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
|
||||
COMPONENT tests)
|
||||
add_test(AllTests runCoreUnitTests)
|
||||
+139
@@ -0,0 +1,139 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <stdlib.h>
|
||||
#include "api/rocprofiler_singleton.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#define MAX_THREADS 10000
|
||||
struct devices_t {
|
||||
std::vector<hsa_agent_t> cpu_devices;
|
||||
std::vector<hsa_agent_t> gpu_devices;
|
||||
std::vector<hsa_agent_t> other_devices;
|
||||
};
|
||||
|
||||
hsa_status_t device_cb_tool(hsa_agent_t agent, void* data) {
|
||||
hsa_device_type_t device_type;
|
||||
devices_t* devices = reinterpret_cast<devices_t*>(data);
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type) != HSA_STATUS_SUCCESS) {
|
||||
std::cout << "hsa_iterate_agents failed" << std::endl;
|
||||
std::exit(-1);
|
||||
}
|
||||
switch (device_type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
devices->cpu_devices.push_back(agent);
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
devices->gpu_devices.push_back(agent);
|
||||
break;
|
||||
default:
|
||||
devices->other_devices.push_back(agent);
|
||||
break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
void get_hsa_agents_list_tool(devices_t* device_list) {
|
||||
// Enumerate the agents.
|
||||
if (hsa_iterate_agents(device_cb_tool, device_list) != HSA_STATUS_SUCCESS) {
|
||||
std::cout << "hsa_iterate_agents failed" << std::endl;
|
||||
std::exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TestHSASupportSingleton {
|
||||
public:
|
||||
uint64_t ref_address;
|
||||
TestHSASupportSingleton() {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
ref_address = reinterpret_cast<long int>(&hsasupport_singleton);
|
||||
}
|
||||
};
|
||||
|
||||
void instanciateHSASupportSingleton(int index, uint64_t *ref_array) {
|
||||
TestHSASupportSingleton test;
|
||||
*(ref_array+index) = test.ref_address;
|
||||
}
|
||||
|
||||
//Add more threads here
|
||||
TEST(WhenInvokingHSASingleton, HSASupportSingletonInstanciation) {
|
||||
std::vector<std::thread> threads;
|
||||
uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS));
|
||||
for(int i = 0; i < MAX_THREADS; i++) {
|
||||
threads.emplace_back(instanciateHSASupportSingleton, i, &refaddress[0]);
|
||||
}
|
||||
for (auto&& thread : threads) thread.join();
|
||||
uint64_t ref_addr = refaddress[0];
|
||||
for(int i = 1; i < MAX_THREADS; i++)
|
||||
EXPECT_EQ(ref_addr, refaddress[i]) << "HSASingleton Instanciation failed";
|
||||
}
|
||||
|
||||
|
||||
TEST(WhenInvokingGetHSAInitialize, TestHSASupportSingleton) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
devices_t device_list;
|
||||
get_hsa_agents_list_tool(&device_list);
|
||||
for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) {
|
||||
[[maybe_unused]]rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
|
||||
}
|
||||
EXPECT_EQ(hsasupport_singleton.gpu_agents.size(), device_list.gpu_devices.size()) << "HSAInitialize failed";
|
||||
}
|
||||
|
||||
TEST(WhenInvokingGetHSAAgentInfo, TestHSASupportSingleton) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
devices_t device_list;
|
||||
get_hsa_agents_list_tool(&device_list);
|
||||
for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) {
|
||||
rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle);
|
||||
uint32_t gpu_id;
|
||||
char name[64];
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(
|
||||
*it, (hsa_agent_info_t)(HSA_AMD_AGENT_INFO_DRIVER_UID), &gpu_id);
|
||||
hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(*it, HSA_AGENT_INFO_NAME, name);
|
||||
|
||||
EXPECT_EQ(agent_info.GetDeviceInfo().getGPUId(), gpu_id) << "HSAAgentInfo has incorrect gpu id for the agent: " << it->handle;
|
||||
EXPECT_EQ(strcmp(agent_info.GetDeviceInfo().getName().data(), name), 0) << "HSAAgentInfo has incorrect gpu name for the agent: " << it->handle;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WhenInvokingQueueInterceptors, TestQueueInterceptors) {
|
||||
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsa_queue_t* queue1 = nullptr, *queue2 = nullptr;
|
||||
|
||||
hsa_status_t status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, &queue1);
|
||||
|
||||
|
||||
EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue create interceptor failed";
|
||||
status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, &queue2);
|
||||
status = hsa_queue_destroy(queue1);
|
||||
EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue destroy interceptor failed";
|
||||
}
|
||||
|
||||
+207
@@ -0,0 +1,207 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
|
||||
#include <experimental/filesystem>
|
||||
#include "api/rocprofiler_singleton.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
using namespace std::string_literals;
|
||||
|
||||
#define MAX_THREADS 10000
|
||||
TEST(WhenTestingDeviceInfo, TestFailFatal) {
|
||||
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
|
||||
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
|
||||
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
|
||||
fs::path node_path = dirp_entry.path();
|
||||
long long node_id = std::stoll(std::string(dirp_entry.path().stem().string()));
|
||||
fs::path gpu_path = node_path / "gpu_id";
|
||||
std::ifstream gpu_id_file(gpu_path);
|
||||
std::string gpu_id_str;
|
||||
long long gpu_id = 0;
|
||||
if (gpu_id_file.is_open()) {
|
||||
gpu_id_file >> gpu_id_str;
|
||||
if (!gpu_id_str.empty()) {
|
||||
gpu_id = std::stoll(gpu_id_str);
|
||||
if (gpu_id == 0) {
|
||||
EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, gpu_id), "");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WhenTestingDeviceInfo, DeviceInfoReadSuccessfully) {
|
||||
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
|
||||
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
|
||||
uint64_t gpu_id = 0;
|
||||
uint32_t wave_front_size = 0;
|
||||
uint32_t location_id = 0;
|
||||
uint32_t domain = 0;
|
||||
uint32_t shader_arrays_per_se = 0;
|
||||
[[maybe_unused]] uint32_t max_waves_per_simd = 0;
|
||||
uint32_t array_count = 0;
|
||||
uint32_t se_num = 0;
|
||||
uint32_t cu_num = 0;
|
||||
uint32_t cu_per_simd_array = 0;
|
||||
uint32_t simd_per_cu = 0;
|
||||
uint64_t unique_gpu_id = 0;
|
||||
uint32_t xcc_num = 1;
|
||||
uint32_t compute_units_per_sh = 0;
|
||||
long long topology_id = 0;
|
||||
uint32_t waves_per_cu = 0;
|
||||
uint32_t wave_slots_per_simd = 0;
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler_instance =
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
for (auto const& dirp_entry : fs::directory_iterator{dirp}) {
|
||||
fs::path node_path = dirp_entry.path();
|
||||
topology_id = std::stoll(dirp_entry.path().stem().string());
|
||||
fs::path gpu_path = node_path / "gpu_id";
|
||||
std::ifstream gpu_id_file(gpu_path.c_str());
|
||||
std::string gpu_id_str;
|
||||
if (gpu_id_file.is_open()) {
|
||||
gpu_id_file >> gpu_id_str;
|
||||
if (!gpu_id_str.empty()) {
|
||||
gpu_id = std::stoll(gpu_id_str);
|
||||
if (gpu_id > 0) {
|
||||
const Agent::DeviceInfo& device_info = rocprofiler_instance.GetDeviceInfo(gpu_id);
|
||||
fs::path properties_path = node_path / "properties";
|
||||
std::ifstream props_ifs(properties_path);
|
||||
if (!props_ifs.is_open())
|
||||
rocprofiler::fatal("Could not open %s/properties", properties_path.c_str());
|
||||
std::string prop_name;
|
||||
uint64_t prop_value;
|
||||
EXPECT_TRUE(gpu_id == device_info.getGPUId());
|
||||
while (props_ifs >> prop_name >> prop_value) {
|
||||
if (prop_name == "wave_front_size")
|
||||
wave_front_size = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "array_count")
|
||||
array_count = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "simd_per_cu")
|
||||
simd_per_cu = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "location_id")
|
||||
location_id = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "domain")
|
||||
domain = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "simd_arrays_per_engine")
|
||||
shader_arrays_per_se = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "max_waves_per_simd")
|
||||
max_waves_per_simd = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "cu_per_simd_array")
|
||||
cu_per_simd_array = static_cast<uint32_t>(prop_value);
|
||||
else if (prop_name == "unique_id")
|
||||
unique_gpu_id = static_cast<uint64_t>(prop_value);
|
||||
else if (prop_name == "num_xcc")
|
||||
xcc_num = static_cast<uint32_t>(prop_value);
|
||||
}
|
||||
se_num = array_count / shader_arrays_per_se;
|
||||
cu_num = cu_per_simd_array * array_count;
|
||||
waves_per_cu = 8 * simd_per_cu;
|
||||
compute_units_per_sh = cu_num / (se_num * shader_arrays_per_se);
|
||||
wave_slots_per_simd = waves_per_cu / simd_per_cu;
|
||||
|
||||
|
||||
EXPECT_TRUE(wave_front_size == device_info.getMaxWaveSize())
|
||||
<< "Device Info has incorrect wave_front_size ";
|
||||
EXPECT_TRUE(simd_per_cu == device_info.getSimdCountPerCU())
|
||||
<< "Device Info has incorrect simd_per_cu";
|
||||
EXPECT_TRUE(location_id == device_info.getPCILocationID())
|
||||
<< "Device Info has incorrect location_id";
|
||||
EXPECT_TRUE(se_num == device_info.getShaderEngineCount())
|
||||
<< "Device Info has incorrect se_num";
|
||||
EXPECT_TRUE(waves_per_cu == device_info.getMaxWavesPerCU())
|
||||
<< "Device Info has incorrect waves_per_cu";
|
||||
|
||||
EXPECT_TRUE(domain == device_info.getPCIDomain()) << "Device Info has incorrect domain";
|
||||
EXPECT_TRUE(shader_arrays_per_se == device_info.getShaderArraysPerSE())
|
||||
<< "Device Info has incorrect shader_arrays_per_se";
|
||||
EXPECT_TRUE(cu_num == device_info.getCUCount()) << "Device Info has incorrect cu_num";
|
||||
EXPECT_TRUE(compute_units_per_sh == device_info.getCUCountPerSH())
|
||||
<< "Device Info has incorrect compute_units_per_sh ";
|
||||
EXPECT_TRUE(wave_slots_per_simd == device_info.getWaveSlotsPerSimd())
|
||||
<< "Device Info has incorrect wave_slots_per_simd ";
|
||||
EXPECT_TRUE(unique_gpu_id == device_info.getUniqueGPUId())
|
||||
<< "Device Info has incorrect unique_gpu_id ";
|
||||
EXPECT_TRUE(xcc_num == device_info.getXccCount()) << "Device Info has incorrect xcc_num ";
|
||||
;
|
||||
EXPECT_TRUE(static_cast<uint32_t>(topology_id) == device_info.getNumaNode())
|
||||
<< "Device Info has incorrect topology_id";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WhenTestingDeviceInfo, GetDeviceInfoFail) {
|
||||
fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes");
|
||||
fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes";
|
||||
if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str());
|
||||
uint64_t node_id = 0;
|
||||
for ([[maybe_unused]] auto const& dirp_entry : fs::directory_iterator{dirp}) {
|
||||
node_id++;
|
||||
}
|
||||
node_id++;
|
||||
fs::path node_path = sysfs_nodes_path / std::to_string(node_id);
|
||||
std::stringstream error;
|
||||
error << "Could not opendir `" << node_path.c_str() << "'";
|
||||
|
||||
EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, 1), error.str().c_str());
|
||||
}
|
||||
|
||||
|
||||
class TestRocprofilerSingleton {
|
||||
public:
|
||||
uintptr_t ref_address;
|
||||
TestRocprofilerSingleton() {
|
||||
rocprofiler::ROCProfiler_Singleton& rocprofiler =
|
||||
rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
ref_address = reinterpret_cast<long int>(&rocprofiler);
|
||||
}
|
||||
};
|
||||
|
||||
void instantiateRocprofiler(uint64_t *target) {
|
||||
TestRocprofilerSingleton test;
|
||||
*target = test.ref_address;
|
||||
|
||||
}
|
||||
|
||||
|
||||
//Add more threads here
|
||||
TEST(WhenInvokingRocprofilerSingleton, RocprofilerSingletonInstanciation) {
|
||||
std::vector<std::thread> threads;
|
||||
uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS));
|
||||
for(int i = 0; i < MAX_THREADS; i++) {
|
||||
threads.emplace_back(instantiateRocprofiler, &refaddress[i]);
|
||||
}
|
||||
for (auto&& thread : threads) thread.join();
|
||||
uint64_t ref_addr = refaddress[0];
|
||||
for(int i = 1; i < MAX_THREADS; i++)
|
||||
EXPECT_EQ(ref_addr, refaddress[i]) << "RocprofilerSingleton Instanciation failed";
|
||||
free(refaddress);
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
#include "api/rocprofiler_singleton.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include "tests-v2/HSAToolLibrary/HSATool.h"
|
||||
|
||||
// used for dl_addr to locate the running
|
||||
// path for executable
|
||||
int main(int argc, char** argv);
|
||||
std::string metrics_path;
|
||||
std::string running_path;
|
||||
|
||||
bool is_installed_path() {
|
||||
std::string path;
|
||||
char* real_path;
|
||||
Dl_info dl_info;
|
||||
|
||||
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
|
||||
path = dl_info.dli_fname;
|
||||
real_path = realpath(path.c_str(), NULL);
|
||||
if (real_path == nullptr) {
|
||||
throw(std::string("Error! in extracting real path"));
|
||||
}
|
||||
path.clear(); // reset path
|
||||
path.append(real_path);
|
||||
if (path.find("/opt") != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void init_test_path() {
|
||||
metrics_path = "libexec/rocprofiler/counters/derived_counters.xml";
|
||||
if (is_installed_path()) {
|
||||
running_path = "share/rocprofiler/tests/runCoreUnitTests";
|
||||
} else {
|
||||
running_path = "tests-v2/unittests/core/runCoreUnitTests";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This function returns the running path of executable
|
||||
std::string GetRunningPath(std::string string_to_erase) {
|
||||
std::string path;
|
||||
const char* real_path;
|
||||
Dl_info dl_info;
|
||||
|
||||
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
|
||||
std::string to_erase = string_to_erase;
|
||||
path = dl_info.dli_fname;
|
||||
real_path = realpath(path.c_str(), NULL);
|
||||
if (real_path == nullptr) {
|
||||
throw(std::string("Error! in extracting real path"));
|
||||
}
|
||||
path.clear(); // reset path
|
||||
path.append(real_path);
|
||||
|
||||
size_t pos = path.find(to_erase);
|
||||
if (pos != std::string::npos) path.erase(pos, to_erase.length());
|
||||
} else {
|
||||
throw(std::string("Error! in extracting real path"));
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
int HSATool_onload_callback(void* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
rocprofiler::HSASupport_Singleton& hsasupport_singleton =
|
||||
rocprofiler::HSASupport_Singleton::GetInstance();
|
||||
hsasupport_singleton.HSAInitialize(reinterpret_cast<HsaApiTable*>(table));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
init_test_path();
|
||||
SetHSACallback(HSATool_onload_callback);
|
||||
metrics_path = "libexec/rocprofiler/counters/derived_counters.xml";
|
||||
std::string app_path = GetRunningPath(running_path);
|
||||
std::stringstream gfx_path;
|
||||
gfx_path << app_path << metrics_path;
|
||||
setenv("ROCPROFILER_METRICS_PATH", gfx_path.str().c_str(), true);
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
hsa_init();
|
||||
int status = RUN_ALL_TESTS();
|
||||
hsa_shut_down();
|
||||
return status;
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
# Setup unit testing env
|
||||
|
||||
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
|
||||
|
||||
enable_testing()
|
||||
find_package(GTest REQUIRED)
|
||||
|
||||
find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED)
|
||||
|
||||
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
|
||||
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
|
||||
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
|
||||
|
||||
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
|
||||
|
||||
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
|
||||
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
|
||||
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
|
||||
|
||||
file(GLOB ROCPROFILER_SRC_PROFILER_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
|
||||
file(GLOB ROCPROFILER_TRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
|
||||
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
|
||||
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_CLASS_FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp)
|
||||
file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp)
|
||||
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
|
||||
file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp)
|
||||
set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES}
|
||||
${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES})
|
||||
|
||||
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
|
||||
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
|
||||
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp)
|
||||
|
||||
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
|
||||
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp
|
||||
${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
|
||||
|
||||
# Compiling gtests
|
||||
file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES
|
||||
${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp)
|
||||
add_executable(
|
||||
runUnitTests
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp
|
||||
${CORE_MEMORY_SRC_FILES}
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_SAMPLER_SRC_FILES}
|
||||
${CORE_HW_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
${CORE_HSA_SRC_FILES}
|
||||
${CORE_HSA_PACKETS_SRC_FILES}
|
||||
${CORE_COUNTERS_SRC_FILES}
|
||||
${CORE_HSA_QUEUES_SRC_FILES}
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_COUNTERS_MMIO_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES}
|
||||
${GTEST_MAIN_SRC_FILE}
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
runUnitTests
|
||||
PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR}
|
||||
${PROJECT_BINARY_DIR}/rocprofiler)
|
||||
|
||||
target_compile_definitions(
|
||||
runUnitTests
|
||||
PUBLIC AMD_INTERNAL_BUILD
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_link_libraries(
|
||||
runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64
|
||||
GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl)
|
||||
|
||||
add_dependencies(tests runUnitTests)
|
||||
install(TARGETS runUnitTests
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests
|
||||
COMPONENT tests)
|
||||
add_test(AllTests runUnitTests)
|
||||
+3
-18
@@ -30,27 +30,12 @@
|
||||
#include "core/session/session.h"
|
||||
#include "utils/helper.h"
|
||||
|
||||
|
||||
/*
|
||||
* ###############################################
|
||||
* ################TESTING HSA_INFO###############
|
||||
* ###############################################
|
||||
*/
|
||||
|
||||
TEST(WhenTestingAgentInfoGetterSetters, TestRunsSuccessfully) {
|
||||
Agent::AgentInfo agent_info = Agent::AgentInfo();
|
||||
char gpu_name[] = "gfx10";
|
||||
agent_info.setName(gpu_name);
|
||||
agent_info.setIndex(0);
|
||||
agent_info.setType(hsa_device_type_t::HSA_DEVICE_TYPE_GPU);
|
||||
|
||||
EXPECT_EQ(agent_info.getName(), gpu_name);
|
||||
EXPECT_EQ(agent_info.getIndex(), 0);
|
||||
EXPECT_EQ(agent_info.getType(), hsa_device_type_t::HSA_DEVICE_TYPE_GPU);
|
||||
|
||||
Agent::CounterHardwareInfo hw_info(0, "GRBM");
|
||||
EXPECT_TRUE(getHardwareInfo(0, "GRBM", &hw_info));
|
||||
}
|
||||
|
||||
void buffer_callback_fun(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
@@ -66,7 +51,7 @@ void buffer_callback_fun(const rocprofiler_record_header_t* begin,
|
||||
|
||||
// A lot have changed in the class, since this test was written
|
||||
// Need to rewrite all the test cases again.
|
||||
TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) {
|
||||
TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) {
|
||||
Memory::GenericBuffer* buffer = new Memory::GenericBuffer(
|
||||
rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0}, 0x8000, buffer_callback_fun);
|
||||
|
||||
@@ -285,7 +270,7 @@ void (*callback_fun)(const rocprofiler_record_header_t* begin,
|
||||
TEST(WhenTestingCounterCollectionMode, TestSucceeds) {
|
||||
rocprofiler_session_id_t session_id;
|
||||
|
||||
rocprofiler::ROCProfiler_Singleton toolobj;
|
||||
rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE);
|
||||
rocprofiler_filter_id_t filter_id =
|
||||
toolobj.GetSession(session_id)
|
||||
@@ -304,7 +289,7 @@ TEST(WhenTestingCounterCollectionMode, TestSucceeds) {
|
||||
TEST(WhenTestingTimeStampCollectionMode, TestSucceeds) {
|
||||
rocprofiler_session_id_t session_id;
|
||||
|
||||
rocprofiler::ROCProfiler_Singleton toolobj;
|
||||
rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE);
|
||||
rocprofiler_filter_id_t filter_id =
|
||||
toolobj.GetSession(session_id)
|
||||
Ссылка в новой задаче
Block a user