diff --git a/projects/rocprofiler/build.sh b/projects/rocprofiler/build.sh index d2e6dedd25..33ce9e5f32 100755 --- a/projects/rocprofiler/build.sh +++ b/projects/rocprofiler/build.sh @@ -72,7 +72,7 @@ if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi if [ -z "$RUN_TEST" ] ; then RUN_TEST=no; fi if [ -z "$ASAN" ] ; then ASAN=False; fi if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi - + ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD) @@ -96,6 +96,7 @@ cmake \ -DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \ -DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \ -DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \ + -DHIP_ROOT_DIR=${ROCM_PATH} \ $ROCPROFILER_ROOT popd diff --git a/projects/rocprofiler/src/api/rocprofiler_singleton.cpp b/projects/rocprofiler/src/api/rocprofiler_singleton.cpp index b93af87cc2..da8caa1cd7 100644 --- a/projects/rocprofiler/src/api/rocprofiler_singleton.cpp +++ b/projects/rocprofiler/src/api/rocprofiler_singleton.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,7 @@ #include "src/utils/logger.h" #include "src/core/memory/generic_buffer.h" +namespace fs = std::experimental::filesystem; #define ASSERTM(exp, msg) assert(((void)msg, exp)) extern std::mutex sessions_pending_signal_lock; @@ -42,14 +44,37 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); } namespace rocprofiler { -ROCProfiler_Singleton* rocprofiler_singleton; + // Constructor of ROCProfiler // Takes the buffer size, a buffer callback function and a buffer flush // interval to allocate a buffer pool using GenericStorage Also takes the // replay mode (application replay/kernel replay/user replay) to set the replay // mode for the rocprofiler class object -ROCProfiler_Singleton::ROCProfiler_Singleton() : current_session_id_(rocprofiler_session_id_t{0}) {} +ROCProfiler_Singleton::ROCProfiler_Singleton() { + fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes"; + fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes"); + if (!fs::exists(sysfs_nodes_path)) + rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str()); + for (auto const& dirp_entry : fs::directory_iterator{dirp}) { + fs::path node_path = dirp_entry.path(); + long long node_id = std::stoll(dirp_entry.path().stem().string()); + fs::path gpu_path = node_path / "gpu_id"; + std::ifstream gpu_id_file(gpu_path.c_str()); + std::string gpu_id_str; + if (gpu_id_file.is_open()) { + gpu_id_file >> gpu_id_str; + if (!gpu_id_str.empty()) { + long long gpu_id = std::stoll(gpu_id_str); + if (gpu_id > 0) { + Agent::DeviceInfo deviceInfo(node_id, gpu_id); + // Since it is in static initializer, so its protected + agent_device_map_.emplace(deviceInfo.getGPUId(), deviceInfo); + } + } + } + } +} // Destructor of rocprofiler_singleton // deletes the buffer pool @@ -68,7 +93,11 @@ ROCProfiler_Singleton::~ROCProfiler_Singleton() { } Counter::ClearBasicCounters(); } - +ROCProfiler_Singleton& ROCProfiler_Singleton::GetInstance() { + static ROCProfiler_Singleton* rocprofiler_singleton_instance = + new ROCProfiler_Singleton; + return *rocprofiler_singleton_instance; +} bool ROCProfiler_Singleton::FindAgent(rocprofiler_agent_id_t agent_id) { return true; } size_t ROCProfiler_Singleton::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id) { @@ -210,28 +239,45 @@ const char* ROCProfiler_Singleton::GetKernelInfo(rocprofiler_kernel_info_kind_t } } +const Agent::DeviceInfo& ROCProfiler_Singleton::GetDeviceInfo(uint64_t gpu_id) { + std::lock_guard info_map_lock(agent_device_map_mutex_); + auto it = agent_device_map_.find(gpu_id); + if (it == agent_device_map_.end()) + rocprofiler::fatal("Device Info is not found for the given id:%ld", gpu_id); + return it->second; +} + // TODO(aelwazir): To be implemented bool ROCProfiler_Singleton::CheckFilterData(rocprofiler_filter_kind_t filter_kind, rocprofiler_filter_data_t filter_data) { return true; } -// End of ROCProfiler_Singleton Class +rocprofiler_timestamp_t ROCProfiler_Singleton::timestamp_ns() { -ROCProfiler_Singleton* GetROCProfilerSingleton() { return rocprofiler_singleton; } + static uint64_t sys_clock_period = 0; + struct timespec ts; + // We are not full on memory model. At worst each CPU can call it once. + // We don't care for this variable's value in global memory as long as we have + // non-zero in the CPU cache. + if (sys_clock_period == 0) { + clock_getres(CLOCK_BOOTTIME, &ts); + sys_clock_period = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec)); + } -void InitROCProfilerSingleton() { rocprofiler_singleton = new ROCProfiler_Singleton; } -void ResetROCProfilerSingleton() { - delete rocprofiler_singleton; - // TODO(aelwazir): We need to use std::optional or std::unique_ptr - // if (rocprofiler_singleton) rocprofiler_singleton.reset(); + clock_gettime(CLOCK_BOOTTIME, &ts); + uint64_t time = (uint64_t(ts.tv_sec) * 1000000000 + uint64_t(ts.tv_nsec)); + if (sys_clock_period != 1) + return rocprofiler_timestamp_t{time / sys_clock_period}; + else + return rocprofiler_timestamp_t{time}; } -rocprofiler_timestamp_t GetCurrentTimestamp() { return hsa_support::GetCurrentTimestampNS(); } - rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) { - if (hsa_support::IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS; + if (hsa_support_IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS; return ROCPROFILER_STATUS_ERROR; } +// End of ROCProfiler_Singleton Class + } // namespace rocprofiler diff --git a/projects/rocprofiler/src/api/rocprofiler_singleton.h b/projects/rocprofiler/src/api/rocprofiler_singleton.h index d992515b33..9271458db8 100644 --- a/projects/rocprofiler/src/api/rocprofiler_singleton.h +++ b/projects/rocprofiler/src/api/rocprofiler_singleton.h @@ -41,13 +41,15 @@ #include "src/core/session/session.h" #include "src/core/session/device_profiling.h" +#include "src/core/hardware/hsa_info.h" namespace rocprofiler { class ROCProfiler_Singleton { public: - ROCProfiler_Singleton(); - ~ROCProfiler_Singleton(); + ROCProfiler_Singleton(const ROCProfiler_Singleton&) = delete; + ROCProfiler_Singleton& operator=(const ROCProfiler_Singleton&) = delete; + static ROCProfiler_Singleton& GetInstance(); bool FindAgent(rocprofiler_agent_id_t agent_id); size_t GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id); @@ -84,7 +86,8 @@ class ROCProfiler_Singleton { rocprofiler_filter_data_t filter_data); uint64_t GetUniqueRecordId(); uint64_t GetUniqueKernelDispatchId(); - + const Agent::DeviceInfo& GetDeviceInfo(uint64_t gpu_id); + rocprofiler_timestamp_t timestamp_ns(); private: rocprofiler_session_id_t current_session_id_{0}; std::mutex session_map_lock_; @@ -92,6 +95,11 @@ class ROCProfiler_Singleton { std::atomic records_counter_{1}; std::mutex device_profiling_session_map_lock_; std::map dev_profiling_sessions_; + std::mutex agent_device_map_mutex_; + std::unordered_map agent_device_map_; + ROCProfiler_Singleton(); + ~ROCProfiler_Singleton(); + /* * XXX: Associating PC samples with a running kernel requires an identifier * that will be unique across all kernel executions. It is not enough to use @@ -110,14 +118,7 @@ class ROCProfiler_Singleton { std::atomic kernel_dispatch_counter_{1}; }; -void InitROCProfilerSingleton(); -void ResetROCProfilerSingleton(); -ROCProfiler_Singleton* GetROCProfilerSingleton(); - -rocprofiler_timestamp_t GetCurrentTimestamp(); - rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback); - } // namespace rocprofiler #endif // SRC_TOOLS_ROCPROFILER_SINGLETON_H_ diff --git a/projects/rocprofiler/src/api/rocprofilerv2.cpp b/projects/rocprofiler/src/api/rocprofilerv2.cpp index 9c473faf39..9ab1804de9 100644 --- a/projects/rocprofiler/src/api/rocprofilerv2.cpp +++ b/projects/rocprofiler/src/api/rocprofilerv2.cpp @@ -132,7 +132,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() { API_METHOD_PREFIX if (api_started.load(std::memory_order_relaxed)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED); - rocprofiler::InitROCProfilerSingleton(); + rocprofiler::ROCProfiler_Singleton::GetInstance(); api_started.exchange(true, std::memory_order_release); API_METHOD_SUFFIX } @@ -140,14 +140,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() { // Finalize the API ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() { API_INIT_CHECKER - rocprofiler::ResetROCProfilerSingleton(); api_started.exchange(false, std::memory_order_release); API_METHOD_SUFFIX } ROCPROFILER_API rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) { API_INIT_CHECKER - *timestamp = rocprofiler::GetCurrentTimestamp(); + *timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns(); if (timestamp->value <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE); API_METHOD_SUFFIX @@ -163,20 +162,21 @@ rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info_size( rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, size_t* data_size) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindAgent(agent_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND); - *data_size = rocprofiler::GetROCProfilerSingleton()->GetAgentInfoSize(kind, agent_id); - if (*data_size <= 0) - throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING); + *data_size = rocprofiler_singleton.GetAgentInfoSize(kind, agent_id); + if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING); API_METHOD_SUFFIX } ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info( rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id, const char** data) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindAgent(agent_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindAgent(agent_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND); - if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetAgentInfo(kind, agent_id))) + if (!(*data = rocprofiler_singleton.GetAgentInfo(kind, agent_id))) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING); API_METHOD_SUFFIX } @@ -184,20 +184,21 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info( ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info_size( rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, size_t* data_size) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindQueue(queue_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND); - *data_size = rocprofiler::GetROCProfilerSingleton()->GetQueueInfoSize(kind, queue_id); - if (*data_size <= 0) - throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING); + *data_size = rocprofiler_singleton.GetQueueInfoSize(kind, queue_id); + if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING); API_METHOD_SUFFIX } ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info( rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id, const char** data) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindQueue(queue_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindQueue(queue_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND); - if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetQueueInfo(kind, queue_id))) + if (!(*data = rocprofiler_singleton.GetQueueInfo(kind, queue_id))) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING); API_METHOD_SUFFIX } @@ -205,9 +206,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info( ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size( rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, size_t* data_size) { API_INIT_CHECKER - // if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id)) + // if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id)) // throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND); - *data_size = rocprofiler::GetROCProfilerSingleton()->GetKernelInfoSize(kind, kernel_id); + *data_size = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfoSize(kind, kernel_id); if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING); API_METHOD_SUFFIX @@ -216,9 +217,9 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size( ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info( rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id, const char** data) { API_INIT_CHECKER - // if (!rocprofiler::GetROCProfilerSingleton()->FindKernel(kernel_id)) + // if (!rocprofiler::rocmtool::GetInstance().FindKernel(kernel_id)) // throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND); - if (!(*data = rocprofiler::GetROCProfilerSingleton()->GetKernelInfo(kind, kernel_id))) + if (!(*data = rocprofiler::ROCProfiler_Singleton::GetInstance().GetKernelInfo(kind, kernel_id))) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING); API_METHOD_SUFFIX } @@ -227,14 +228,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info_size( rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind, rocprofiler_counter_id_t counter_id, size_t* data_size) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetProfiler() - ->FindCounter(counter_id)) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND); - *data_size = rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetProfiler() + *data_size = rocprofiler_singleton + .GetSession(session_id) + ->GetProfiler() ->GetCounterInfoSize(kind, counter_id); if (*data_size <= 0) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING); @@ -245,13 +244,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info( rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind, rocprofiler_counter_id_t counter_id, const char** data) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetProfiler() - ->FindCounter(counter_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.GetSession(session_id)->GetProfiler()->FindCounter(counter_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND); - if (!(*data = rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) + if (!(*data = rocprofiler_singleton + .GetSession(session_id) ->GetProfiler() ->GetCounterInfo(kind, counter_id))) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING); @@ -280,14 +277,12 @@ rocprofiler_query_tracer_operation_id(rocprofiler_tracer_activity_domain_t domai ROCPROFILER_API rocprofiler_status_t rocprofiler_flush_data(rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetBuffer(buffer_id) - ->Flush()) + if (!rocprofiler_singleton.GetSession(session_id)->GetBuffer(buffer_id)->Flush()) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER); API_METHOD_SUFFIX } @@ -298,9 +293,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record( const rocprofiler_record_header_t* record, const rocprofiler_record_header_t** next, rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND); if (!Memory::GetNextRecord(record, next)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED); @@ -311,7 +307,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record( ROCPROFILER_API rocprofiler_status_t rocprofiler_create_session( rocprofiler_replay_mode_t replay_mode, rocprofiler_session_id_t* session_id) { API_INIT_CHECKER - *session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode); + *session_id = rocprofiler::ROCProfiler_Singleton::GetInstance().CreateSession(replay_mode); API_METHOD_SUFFIX } @@ -322,15 +318,16 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter( API_INIT_CHECKER // TODO(aelwazir): CheckFilterData to be implemented // int error_code = - // rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind, + // rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind, // filter_data); // if (error_code == -1) throw // rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0) // throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH); - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - *filter_id = rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) + *filter_id = rocprofiler_singleton + .GetSession(session_id) ->CreateFilter(filter_kind, filter_data, data_count, property); API_METHOD_SUFFIX } @@ -338,11 +335,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter( ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_filter(rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND); - rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyFilter(filter_id); + rocprofiler_singleton.GetSession(session_id)->DestroyFilter(filter_id); API_METHOD_SUFFIX } @@ -350,10 +348,11 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_buffer( rocprofiler_session_id_t session_id, rocprofiler_buffer_callback_t buffer_callback, size_t buffer_size, rocprofiler_buffer_id_t* buffer_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - *buffer_id = rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) + *buffer_id = rocprofiler_singleton + .GetSession(session_id) ->CreateBuffer(buffer_callback, buffer_size); API_METHOD_SUFFIX } @@ -362,12 +361,13 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties( rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id, rocprofiler_buffer_property_t* buffer_properties, uint32_t buffer_properties_count) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND); - rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) + rocprofiler_singleton + .GetSession(session_id) ->GetBuffer(buffer_id) ->SetProperties(buffer_properties, buffer_properties_count); API_METHOD_SUFFIX @@ -376,11 +376,12 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties( ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND); - rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->DestroyBuffer(buffer_id); + rocprofiler_singleton.GetSession(session_id)->DestroyBuffer(buffer_id); API_METHOD_SUFFIX } @@ -388,20 +389,18 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_filter_buffer( rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id, rocprofiler_buffer_id_t buffer_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindBuffer(buffer_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindBuffer(buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) + if (!rocprofiler_singleton + .GetSession(session_id) ->CheckFilterBufferSize(filter_id, buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE); - rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetFilter(filter_id) - ->SetBufferId(buffer_id); + rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetBufferId(buffer_id); API_METHOD_SUFFIX } @@ -409,19 +408,15 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_set_api_trace_sync_callback( rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id, rocprofiler_sync_callback_t callback) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->FindFilter(filter_id)) + if (!rocprofiler_singleton.GetSession(session_id)->FindFilter(filter_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND); - if (rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetFilter(filter_id) - ->GetKind() != ROCPROFILER_API_TRACE) + if (rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->GetKind() != + ROCPROFILER_API_TRACE) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED); - rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetFilter(filter_id) - ->SetCallback(callback); + rocprofiler_singleton.GetSession(session_id)->GetFilter(filter_id)->SetCallback(callback); API_METHOD_SUFFIX } @@ -433,30 +428,31 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session( API_INIT_CHECKER // TODO(aelwazir): CheckFilterData to be implemented // int error_code = - // rocprofiler::GetROCProfilerSingleton()->CheckFilterData(filter_kind, + // rocprofiler::ROCProfiler_Singleton::GetInstance().CheckFilterData(filter_kind, // filter_data); // if (error_code == -1) throw // rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED); if (error_code == 0) // throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH); - *session_id = rocprofiler::GetROCProfilerSingleton()->CreateSession(replay_mode); + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + *session_id = rocprofiler_singleton.CreateSession(replay_mode); rocprofiler_filter_id_t filter_id = - rocprofiler::GetROCProfilerSingleton() - ->GetSession(*session_id) + rocprofiler_singleton + .GetSession(*session_id) ->CreateFilter(filter_kind, filter_data, data_count, property); - rocprofiler_buffer_id_t buffer_id = rocprofiler::GetROCProfilerSingleton() - ->GetSession(*session_id) - ->CreateBuffer(buffer_callback, buffer_size); + rocprofiler_buffer_id_t buffer_id = rocprofiler_singleton + .GetSession(*session_id) + ->CreateBuffer(buffer_callback, buffer_size); if (filter_kind == ROCPROFILER_API_TRACE) - rocprofiler::GetROCProfilerSingleton() - ->GetSession(*session_id) + rocprofiler_singleton + .GetSession(*session_id) ->GetFilter(filter_id) ->SetCallback(callback); - if (!rocprofiler::GetROCProfilerSingleton() - ->GetSession(*session_id) + if (!rocprofiler_singleton + .GetSession(*session_id) ->CheckFilterBufferSize(filter_id, buffer_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE); - rocprofiler::GetROCProfilerSingleton() - ->GetSession(*session_id) + rocprofiler_singleton + .GetSession(*session_id) ->GetFilter(filter_id) ->SetBufferId(buffer_id); API_METHOD_SUFFIX @@ -466,9 +462,10 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session( ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_session(rocprofiler_session_id_t session_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - rocprofiler::GetROCProfilerSingleton()->DestroySession(session_id); + rocprofiler_singleton.DestroySession(session_id); API_METHOD_SUFFIX } @@ -476,16 +473,17 @@ rocprofiler_destroy_session(rocprofiler_session_id_t session_id) { ROCPROFILER_API rocprofiler_status_t rocprofiler_start_session(rocprofiler_session_id_t session_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasFilter()) + if (!rocprofiler_singleton.GetSession(session_id)->HasFilter()) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER); - if (!rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->HasBuffer()) + if (!rocprofiler_singleton.GetSession(session_id)->HasBuffer()) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER); - if (rocprofiler::GetROCProfilerSingleton()->HasActiveSession()) + if (rocprofiler_singleton.HasActiveSession()) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION); - rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Start(); - rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(session_id); + rocprofiler_singleton.GetSession(session_id)->Start(); + rocprofiler_singleton.SetCurrentActiveSession(session_id); API_METHOD_SUFFIX } @@ -493,12 +491,13 @@ rocprofiler_start_session(rocprofiler_session_id_t session_id) { ROCPROFILER_API rocprofiler_status_t rocprofiler_terminate_session(rocprofiler_session_id_t session_id) { API_INIT_CHECKER - if (!rocprofiler::GetROCProfilerSingleton()->FindSession(session_id)) + auto& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (!rocprofiler_singleton.FindSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND); - if (!rocprofiler::GetROCProfilerSingleton()->IsActiveSession(session_id)) + if (!rocprofiler_singleton.IsActiveSession(session_id)) throw rocprofiler::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE); - rocprofiler::GetROCProfilerSingleton()->GetSession(session_id)->Terminate(); - rocprofiler::GetROCProfilerSingleton()->SetCurrentActiveSession(rocprofiler_session_id_t{0}); + rocprofiler_singleton.GetSession(session_id)->Terminate(); + rocprofiler_singleton.SetCurrentActiveSession(rocprofiler_session_id_t{0}); API_METHOD_SUFFIX } @@ -507,8 +506,8 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create int cpu_index, int gpu_index) { API_METHOD_PREFIX std::vector counters(counter_names, counter_names + num_counters); - *session_id = rocprofiler::GetROCProfilerSingleton()->CreateDeviceProfilingSession( - counters, cpu_index, gpu_index); + *session_id = + rocprofiler::ROCProfiler_Singleton::GetInstance().CreateDeviceProfilingSession(counters, cpu_index, gpu_index); API_METHOD_SUFFIX } @@ -516,7 +515,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) { API_METHOD_PREFIX - rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StartSession(); + rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StartSession(); API_METHOD_SUFFIX } @@ -524,7 +523,7 @@ rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll( rocprofiler_session_id_t session_id, rocprofiler_device_profile_metric_t* data) { API_METHOD_PREFIX - rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->PollMetrics(data); + rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->PollMetrics(data); API_METHOD_SUFFIX } @@ -532,7 +531,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll( ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) { API_METHOD_PREFIX - rocprofiler::GetROCProfilerSingleton()->GetDeviceProfilingSession(session_id)->StopSession(); + rocprofiler::ROCProfiler_Singleton::GetInstance().GetDeviceProfilingSession(session_id)->StopSession(); API_METHOD_SUFFIX } @@ -540,7 +539,7 @@ rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) { ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) { API_METHOD_PREFIX - rocprofiler::GetROCProfilerSingleton()->DestroyDeviceProfilingSession(session_id); + rocprofiler::ROCProfiler_Singleton::GetInstance().DestroyDeviceProfilingSession(session_id); API_METHOD_SUFFIX } @@ -563,7 +562,7 @@ rocprofiler_codeobj_capture_create( uint64_t userdata ) { API_METHOD_PREFIX - id->handle = rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId(); + id->handle = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId(); codeobj_record::make_capture(*id, mode, userdata); API_METHOD_SUFFIX } @@ -618,7 +617,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { if (started) rocprofiler::fatal("HSA Tool started already!"); started = true; - rocprofiler::hsa_support::Initialize(table); + rocprofiler::HSASupport_Singleton::GetInstance().HSAInitialize(table); return true; } @@ -627,7 +626,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, */ ROCPROFILER_EXPORT void OnUnload() { if (!started) rocprofiler::fatal("HSA Tool hasn't started yet!"); - rocprofiler::hsa_support::Finalize(); + rocprofiler::HSASupport_Singleton::GetInstance().HSAFinalize(); started = false; } diff --git a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp index 21492725ab..d8aebcfada 100644 --- a/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp +++ b/projects/rocprofiler/src/core/counters/metrics/eval_metrics.cpp @@ -1,6 +1,6 @@ #include "eval_metrics.h" #include "src/utils/helper.h" -#include "src/core/hsa/hsa_common.h" +#include "src/core/hsa/hsa_support.h" #include #include @@ -93,16 +93,20 @@ bool metrics::ExtractMetricEvents( results_list holds the result objects for each event (which means, basic counters only) */ try { - uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount(); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + uint32_t xcc_count = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount(); + + for (size_t i = 0; i < metric_names.size(); i++) { counters_vec_t counters_vec; // TODO: saurabh // const Metric* metric = metrics_dict->GetMetricByName(metric_names[i]); const Metric* metric = metrics_dict->Get(metric_names[i]); if (metric == nullptr) { - Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle); - fatal("input metric '%s' not supported on this hardware: %s ", metric_names[i].c_str(), - agentInfo.getName().data()); + HSAAgentInfo& agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle); + fatal("input metric'%s' not supported on this hardware: %s ", metric_names[i].c_str(), + agentInfo.GetDeviceInfo().getName().data()); + } // adding result object for derived metric @@ -176,7 +180,7 @@ bool metrics::ExtractMetricEvents( bool metrics::GetCounterData(hsa_ven_amd_aqlprofile_profile_t* profile, hsa_agent_t gpu_agent, std::vector& results_list) { - uint32_t xcc_count = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle).getXccCount(); + uint32_t xcc_count = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle).GetDeviceInfo().getXccCount(); uint32_t single_xcc_buff_size = profile->output_buffer.size / (sizeof(uint64_t) * xcc_count); callback_data_t callback_data{&results_list, 0, single_xcc_buff_size}; hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(profile, pmcCallback, &callback_data); diff --git a/projects/rocprofiler/src/core/counters/metrics/metrics.h b/projects/rocprofiler/src/core/counters/metrics/metrics.h index afd2326c53..d66f71455e 100644 --- a/projects/rocprofiler/src/core/counters/metrics/metrics.h +++ b/projects/rocprofiler/src/core/counters/metrics/metrics.h @@ -44,6 +44,8 @@ THE SOFTWARE. #include #include #include "src/core/hardware/hsa_info.h" +#include "src/core/hsa/hsa_support.h" + namespace fs = std::experimental::filesystem; namespace rocprofiler { @@ -121,10 +123,10 @@ class MetricsDict { const cache_t* const cache_; }; - static MetricsDict* Create(const Agent::AgentInfo* agent_info) { + static MetricsDict* Create(const rocprofiler::HSAAgentInfo* agent_info) { std::lock_guard lck(mutex_); if (map_ == NULL) map_ = new map_t; - std::string name = agent_info->getGfxip(); + std::string name = agent_info->GetDeviceInfo().getGfxip(); auto ret = map_->insert({name, NULL}); if (ret.second) ret.first->second = new MetricsDict(agent_info); return ret.first->second; @@ -195,7 +197,7 @@ class MetricsDict { return (xml_ != NULL) ? xml_->GetNodes("top." + scope + ".metric") : xml::Xml::nodes_t(); } - MetricsDict(const Agent::AgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) { + MetricsDict(const rocprofiler::HSAAgentInfo* agent_info) : xml_(NULL), agent_info_(agent_info) { std::string xml_name = []() { if (const char* path = getenv("ROCPROFILER_METRICS_PATH"); path != nullptr) return path; return ""; @@ -208,14 +210,14 @@ class MetricsDict { } xml_ = xml::Xml::Create(xml_name); if (xml_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metrics .xml open error '" << xml_name << "'"); - xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->getMaxQueueSize()); - xml_->AddConst("top.const.metric", "CU_NUM", agent_info->getCUCount()); + xml_->AddConst("top.const.metric", "MAX_WAVE_SIZE", agent_info->GetDeviceInfo().getMaxQueueSize()); + xml_->AddConst("top.const.metric", "CU_NUM", agent_info->GetDeviceInfo().getCUCount()); xml_->AddConst("top.const.metric", "SIMD_NUM", - agent_info->getSimdCountPerCU() * agent_info->getCUCount()); - xml_->AddConst("top.const.metric", "SE_NUM", agent_info->getShaderEngineCount()); + agent_info->GetDeviceInfo().getSimdCountPerCU() * agent_info->GetDeviceInfo().getCUCount()); + xml_->AddConst("top.const.metric", "SE_NUM", agent_info->GetDeviceInfo().getShaderEngineCount()); xml_->AddConst("top.const.metric", "LDS_BANKS", 32); ImportMetrics(agent_info, "const"); - agent_name_ = agent_info->getName(); + agent_name_ = agent_info->GetDeviceInfo().getName(); if (agent_name_.find(':') != std::string::npos) // Remove compiler flags from the agent_name agent_name_ = agent_name_.substr(0, agent_name_.find(':')); @@ -233,7 +235,7 @@ class MetricsDict { if (supported_agent_names.find(agent_name_) != supported_agent_names.end()) { ImportMetrics(agent_info, agent_name_); } else { - agent_name_ = agent_info->getGfxip(); + agent_name_ = agent_info->GetDeviceInfo().getGfxip(); ImportMetrics(agent_info, agent_name_); } ImportMetrics(agent_info, "global"); @@ -244,7 +246,7 @@ class MetricsDict { for (auto& entry : cache_) delete entry.second; } - static hsa_ven_amd_aqlprofile_id_query_t Translate(const Agent::AgentInfo* agent_info, + static hsa_ven_amd_aqlprofile_id_query_t Translate(const rocprofiler::HSAAgentInfo* agent_info, const std::string& block_name) { hsa_ven_amd_aqlprofile_profile_t profile{}; profile.agent = hsa_agent_t{agent_info->getHandle()}; @@ -256,7 +258,7 @@ class MetricsDict { return query; } - void ImportMetrics(const Agent::AgentInfo* agent_info, const std::string& scope) { + void ImportMetrics(const rocprofiler::HSAAgentInfo* agent_info, const std::string& scope) { auto arr = xml_->GetNodes("top." + scope + ".metric"); xml::Xml::node_list_t metrics_list(arr.begin(), arr.end()); uint32_t metrics_number = metrics_list.size(); @@ -380,7 +382,7 @@ class MetricsDict { } xml::Xml* xml_; - const Agent::AgentInfo* agent_info_; + const rocprofiler::HSAAgentInfo* agent_info_; std::string agent_name_; cache_t cache_; diff --git a/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.cpp b/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.cpp index 4f8d6e9aa3..c5f28e06f2 100644 --- a/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.cpp +++ b/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.cpp @@ -1,6 +1,7 @@ #include "df_counters_mi200.h" #include "df_perfmon_registers_mi200.h" #include "mmio.h" +#include "src/core/hsa/hsa_support.h" namespace rocprofiler { @@ -15,7 +16,7 @@ namespace rocprofiler { /* get ficaa value for accessing CakeDlwmActiveTransferCount */ #define AMDGPU_PMU_SET_FICAA(o) ((o << 16) | 0x1AF5) -DFPerfMonMI200::DFPerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) { +DFPerfMonMI200::DFPerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) { mmio_ = dynamic_cast(mmio::MMIOManager::CreateMMIO(mmio::DF_PERFMON, info)); } diff --git a/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.h b/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.h index c0b82e6fdc..2241192a2d 100644 --- a/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.h +++ b/projects/rocprofiler/src/core/counters/mmio/df_counters_mi200.h @@ -14,7 +14,7 @@ namespace rocprofiler { class DFPerfMonMI200 : public PerfMon { public: - DFPerfMonMI200(const Agent::AgentInfo& info); + DFPerfMonMI200(const HSAAgentInfo& info); ~DFPerfMonMI200(); void Start() override; void Stop(){}; diff --git a/projects/rocprofiler/src/core/counters/mmio/mmio.cpp b/projects/rocprofiler/src/core/counters/mmio/mmio.cpp index e48b9bc1dd..c19db4f587 100644 --- a/projects/rocprofiler/src/core/counters/mmio/mmio.cpp +++ b/projects/rocprofiler/src/core/counters/mmio/mmio.cpp @@ -51,10 +51,10 @@ void PrintRegisterData(uint32_t& index_value, uint32_t& data_value, const char* #endif } -MMIO::MMIO(const Agent::AgentInfo& info) +MMIO::MMIO(const HSAAgentInfo& info) : agent_info_(&info), pci_memory_(nullptr), type_(DEFAULT_MMAP) { - const auto pci_domain = agent_info_->getPCIDomain(); - const auto pci_location_id = agent_info_->getPCILocationID(); + const auto pci_domain = agent_info_->GetDeviceInfo().getPCIDomain(); + const auto pci_location_id = agent_info_->GetDeviceInfo().getPCILocationID(); pci_device_ = pci_device_find_by_slot(pci_domain, pci_location_id >> 8, pci_location_id & 0xFF, 0); @@ -123,7 +123,7 @@ bool MMIO::RegisterReadAPI(uint32_t reg_offset, uint32_t& value) { } -MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) { +MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const HSAAgentInfo& info) { MMIO* mmio = nullptr; switch (type) { case PCIE_PERFMON: { @@ -152,7 +152,7 @@ MMIO* MMIOManager::CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info) { return mmio; } -MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info) { +MMIO* MMIOManager::GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info) { MMIO* mmio = nullptr; auto it = mmio_instances_.find(info.getHandle()); if (it != mmio_instances_.end()) { diff --git a/projects/rocprofiler/src/core/counters/mmio/mmio.h b/projects/rocprofiler/src/core/counters/mmio/mmio.h index 1a9ce11b45..da4dc08cce 100644 --- a/projects/rocprofiler/src/core/counters/mmio/mmio.h +++ b/projects/rocprofiler/src/core/counters/mmio/mmio.h @@ -23,6 +23,7 @@ #include #include "src/core/hardware/hsa_info.h" +#include "src/core/hsa/hsa_support.h" #include #include @@ -67,17 +68,17 @@ class MMIO { virtual ~MMIO(); friend class MMIOManager; - const Agent::AgentInfo& GetAgentInfo() { return *agent_info_; } + const HSAAgentInfo& GetAgentInfo() { return *agent_info_; } mmap_type_t Type() { return type_; } protected: - MMIO(const Agent::AgentInfo& info); + MMIO(const HSAAgentInfo& info); // default constructor; helpful for derived classes // which want to setup mmio construction differently MMIO() { type_ = DEFAULT_MMAP; }; - const Agent::AgentInfo* agent_info_; + const HSAAgentInfo* agent_info_; struct pci_device* pci_device_; size_t pci_memory_size_; uint32_t* pci_memory_; @@ -94,7 +95,7 @@ class PciePerfmonMMIO : public MMIO { friend class MMIOManager; protected: - PciePerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; }; + PciePerfmonMMIO(const HSAAgentInfo& info) : MMIO(info) { type_ = PCIE_PERFMON; }; }; // DFPerfmonMMIO has same mmio setup approach as @@ -104,7 +105,7 @@ class DFPerfmonMMIO : public MMIO { friend class MMIOManager; protected: - DFPerfmonMMIO(const Agent::AgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; }; + DFPerfmonMMIO( const HSAAgentInfo& info) : MMIO(info) { type_ = DF_PERFMON; }; }; /* Class to manage mmio for UMC/DF/PCIe etc. @@ -114,8 +115,8 @@ class DFPerfmonMMIO : public MMIO { */ class MMIOManager { public: - static MMIO* CreateMMIO(mmap_type_t type, const Agent::AgentInfo& info); - static MMIO* GetMMIOInstance(mmap_type_t type, const Agent::AgentInfo& info); + static MMIO* CreateMMIO(mmap_type_t type, const HSAAgentInfo& info); + static MMIO* GetMMIOInstance(mmap_type_t type, const HSAAgentInfo& info); static void DestroyMMIOInstance(MMIO* instance); private: diff --git a/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.cpp b/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.cpp index e7573ae7e1..3c7d4933c6 100644 --- a/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.cpp +++ b/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.cpp @@ -4,7 +4,7 @@ namespace rocprofiler { -PciePerfMonMI200::PciePerfMonMI200(const Agent::AgentInfo& info) : PerfMon(), mmio_(nullptr) { +PciePerfMonMI200::PciePerfMonMI200(const HSAAgentInfo& info) : PerfMon(), mmio_(nullptr) { mmio_ = dynamic_cast(mmio::MMIOManager::CreateMMIO(mmio::PCIE_PERFMON, info)); } diff --git a/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.h b/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.h index 6a84d318d0..14d629343b 100644 --- a/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.h +++ b/projects/rocprofiler/src/core/counters/mmio/pcie_counters_mi200.h @@ -13,7 +13,7 @@ namespace rocprofiler { class PciePerfMonMI200 : public PerfMon { public: - PciePerfMonMI200(const Agent::AgentInfo& info); + PciePerfMonMI200(const HSAAgentInfo& info); ~PciePerfMonMI200(); void SetCounterNames(std::vector& counter_names) override; void Start() override; diff --git a/projects/rocprofiler/src/core/hardware/hsa_info.cpp b/projects/rocprofiler/src/core/hardware/hsa_info.cpp index 228a899930..57e4e112fe 100644 --- a/projects/rocprofiler/src/core/hardware/hsa_info.cpp +++ b/projects/rocprofiler/src/core/hardware/hsa_info.cpp @@ -19,9 +19,13 @@ THE SOFTWARE. */ #include "hsa_info.h" +#include +#include #include "src/utils/helper.h" +namespace fs = std::experimental::filesystem; + #define CHECK_STATUS(msg, status) \ do { \ if ((status) != HSA_STATUS_SUCCESS) { \ @@ -35,86 +39,120 @@ namespace Agent { // AgentInfo Class -AgentInfo::AgentInfo() {} -AgentInfo::AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table) : handle_(agent.handle) { - if (table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &type_) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info failed"); +char convert(uint32_t version) { + uint32_t diff = version - 10; + if (static_cast('a' + diff) >= 'a' && static_cast('a' + diff) <= 'z') + return static_cast('a' + diff); + rocprofiler::fatal("Incorrect gpu version"); +} - table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NAME, name_); +DeviceInfo::DeviceInfo(uint32_t topology_id, uint32_t gpu_id) { + fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes/"; + fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes"); + if (!fs::exists(sysfs_nodes_path)) + rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str()); + // Check the type of the device using gpu id + if (gpu_id == 0) assert("DeviceInfo does not support CPU"); + numa_node_ = topology_id; + fs::path node_path = sysfs_nodes_path / std::to_string(topology_id); + if (!fs::exists(node_path)) rocprofiler::fatal("Could not opendir `%s'", node_path.c_str()); + xcc_num_ = 1; + gpu_id_ = gpu_id; + fs::path properties_path = node_path / "properties"; + std::ifstream props_ifs(properties_path); + uint32_t cu_per_simd_array = 0, array_count = 0; + uint32_t max_waves_per_simd = 0, gfx_target_version = 0; + std::string prop_name, minor_version_str, stepping_str; + uint64_t prop_value; + uint32_t major_version = 0, minor_version = 0, stepping = 0; + std::stringstream hex_minor_version; + max_wave_size_ = 0; + simds_per_cu_ = 0; + shader_arrays_per_se_ = 0; + se_num_ = 0; + waves_per_cu_ = 0; + cu_num_ = 0; + compute_units_per_sh_ = 0; + if (!props_ifs.is_open()) + rocprofiler::fatal("Could not open %s/properties", properties_path.c_str()); + while (props_ifs >> prop_name >> prop_value) { + if (prop_name == "wave_front_size") { + max_wave_size_ = static_cast(prop_value); + if (max_wave_size_ <= 0) rocprofiler::fatal("Invalid max_wave_size_ in the topology file"); + } else if (prop_name == "cu_per_simd_array") { + cu_per_simd_array = static_cast(prop_value); + if (cu_per_simd_array <= 0) + rocprofiler::fatal("Invalid cu_per_simd_array in the topology file"); + } else if (prop_name == "array_count") { + array_count = static_cast(prop_value); + if (array_count <= 0) rocprofiler::fatal("Invalid array_count in the topology file"); + } else if (prop_name == "simd_per_cu") { + simds_per_cu_ = static_cast(prop_value); + if (simds_per_cu_ <= 0) rocprofiler::fatal("Invalid simd_per_cu in the topology file"); + } else if (prop_name == "location_id") + pci_location_id_ = static_cast(prop_value); + else if (prop_name == "domain") + pci_domain_ = static_cast(prop_value); + else if (prop_name == "simd_arrays_per_engine") { + shader_arrays_per_se_ = static_cast(prop_value); + if (shader_arrays_per_se_ <= 0) + rocprofiler::fatal("Invalid simd_arrays_per_engine in the topology file"); + } else if (prop_name == "max_waves_per_simd") { + max_waves_per_simd = static_cast(prop_value); + if (max_waves_per_simd <= 0) + rocprofiler::fatal("Invalid max_waves_per_simd in the topology file"); + } else if (prop_name == "gfx_target_version") + gfx_target_version = static_cast(prop_value); + + else if (prop_name == "unique_id") + unique_gpu_id_ = static_cast(prop_value); + else if (prop_name == "num_xcc") + xcc_num_ = static_cast(prop_value); + } + + se_num_ = array_count / shader_arrays_per_se_; + waves_per_cu_ = max_waves_per_simd * simds_per_cu_; + cu_num_ = cu_per_simd_array * array_count; + major_version = (gfx_target_version / 100) / 100; + std::string major_version_str = std::to_string(major_version); + minor_version = (gfx_target_version / 100) % 100; + if (minor_version > 9) + minor_version_str = std::string(1, convert(minor_version)); + else + minor_version_str = std::to_string(minor_version); + stepping = (gfx_target_version % 100); + if (stepping > 9) + stepping_str = std::string(1, convert(stepping)); + else + stepping_str = std::to_string(stepping); + std::string gpu_name = "gfx" + major_version_str + minor_version_str + stepping_str; + strcpy(name_, gpu_name.c_str()); + compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_); + wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_; const int gfxip_label_len = std::min(strlen(name_) - 2, sizeof(gfxip_) - 1); memcpy(gfxip_, name_, gfxip_label_len); gfxip_[gfxip_label_len] = '\0'; - - if (type_ != HSA_DEVICE_TYPE_GPU) { - return; - } - - table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &max_wave_size_); - table->hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size_); - - table->hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &cu_num_); - - table->hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &simds_per_cu_); - - table->hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &se_num_); - - if (table->hsa_agent_get_info_fn(agent, - (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, - &shader_arrays_per_se_) != HSA_STATUS_SUCCESS || - table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, - &waves_per_cu_) != HSA_STATUS_SUCCESS) { - rocprofiler::fatal("hsa_agent_get_info for gfxip hardware configuration failed"); - } - - compute_units_per_sh_ = cu_num_ / (se_num_ * shader_arrays_per_se_); - wave_slots_per_simd_ = waves_per_cu_ / simds_per_cu_; - - if (table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_DOMAIN, - &pci_domain_) != HSA_STATUS_SUCCESS || - table->hsa_agent_get_info_fn(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, - &pci_location_id_) != HSA_STATUS_SUCCESS) { - rocprofiler::fatal("hsa_agent_get_info for PCI info failed"); - } - - // TODO(saurabh, giovanni): Remove this in 5.7 - if (table->hsa_agent_get_info_fn(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_XCC), - &xcc_num_) != HSA_STATUS_SUCCESS) { - xcc_num_ = 1; - } } -uint64_t AgentInfo::getIndex() const { return index_; } -hsa_device_type_t AgentInfo::getType() const { return type_; } -uint64_t AgentInfo::getHandle() const { return handle_; } -const std::string_view AgentInfo::getName() const { return name_; } -std::string AgentInfo::getGfxip() const { return std::string(gfxip_); } -uint32_t AgentInfo::getMaxWaveSize() const { return max_wave_size_; } -uint32_t AgentInfo::getMaxQueueSize() const { return max_queue_size_; } -uint32_t AgentInfo::getCUCount() const { return cu_num_; } -uint32_t AgentInfo::getSimdCountPerCU() const { return simds_per_cu_; } -uint32_t AgentInfo::getShaderEngineCount() const { return se_num_; } -uint32_t AgentInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; } -uint32_t AgentInfo::getMaxWavesPerCU() const { return waves_per_cu_; } -uint32_t AgentInfo::getCUCountPerSH() const { return compute_units_per_sh_; } -uint32_t AgentInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; } -uint32_t AgentInfo::getPCIDomain() const { return pci_domain_; } -uint32_t AgentInfo::getPCILocationID() const { return pci_location_id_; } -uint32_t AgentInfo::getXccCount() const { return xcc_num_; } -void AgentInfo::setIndex(uint64_t index) { index_ = index; } -void AgentInfo::setType(hsa_device_type_t type) { type_ = type; } -void AgentInfo::setHandle(uint64_t handle) { handle_ = handle; } -void AgentInfo::setName(const std::string& name) { strcpy(name_, name.c_str()); } - -void AgentInfo::setNumaNode(uint32_t numa_node) { numa_node_ = numa_node; } -uint32_t AgentInfo::getNumaNode() { return numa_node_; } - -void AgentInfo::setNearCpuAgent(hsa_agent_t near_cpu_agent) { near_cpu_agent_ = near_cpu_agent; } -hsa_agent_t AgentInfo::getNearCpuAgent() { return near_cpu_agent_; } +std::string_view DeviceInfo::getName() const { return name_; } +std::string DeviceInfo::getGfxip() const { return std::string(gfxip_); } +uint32_t DeviceInfo::getMaxWaveSize() const { return max_wave_size_; } +uint32_t DeviceInfo::getMaxQueueSize() const { return max_queue_size_; } +uint32_t DeviceInfo::getCUCount() const { return cu_num_; } +uint32_t DeviceInfo::getSimdCountPerCU() const { return simds_per_cu_; } +uint32_t DeviceInfo::getShaderEngineCount() const { return se_num_; } +uint32_t DeviceInfo::getShaderArraysPerSE() const { return shader_arrays_per_se_; } +uint32_t DeviceInfo::getMaxWavesPerCU() const { return waves_per_cu_; } +uint32_t DeviceInfo::getCUCountPerSH() const { return compute_units_per_sh_; } +uint32_t DeviceInfo::getWaveSlotsPerSimd() const { return wave_slots_per_simd_; } +uint32_t DeviceInfo::getPCIDomain() const { return pci_domain_; } +uint32_t DeviceInfo::getPCILocationID() const { return pci_location_id_; } +uint32_t DeviceInfo::getXccCount() const { return xcc_num_; } +uint64_t DeviceInfo::getUniqueGPUId() const { return unique_gpu_id_; } +uint32_t DeviceInfo::getNumaNode() const { return numa_node_; } +uint64_t DeviceInfo::getGPUId() const { return gpu_id_; } // CounterHardwareInfo Class diff --git a/projects/rocprofiler/src/core/hardware/hsa_info.h b/projects/rocprofiler/src/core/hardware/hsa_info.h index f553540ec5..506f13821e 100644 --- a/projects/rocprofiler/src/core/hardware/hsa_info.h +++ b/projects/rocprofiler/src/core/hardware/hsa_info.h @@ -36,18 +36,15 @@ namespace Agent { static const uint32_t LDS_BLOCK_SIZE = 128 * 4; -// XXX TODO: This should be merged into rocprofiler::hsa_support::AgentInfo and -// this file should be removed entirely, as it's completely redundant -class AgentInfo { + +//DeviceInfo supports only GPU +class DeviceInfo { public: - AgentInfo(); - AgentInfo(const hsa_agent_t agent, ::CoreApiTable* table); - - uint64_t getIndex() const; - hsa_device_type_t getType() const; - uint64_t getHandle() const; - const std::string_view getName() const; + DeviceInfo() = default; + DeviceInfo(uint32_t topology_id, uint32_t gpu_id); + uint64_t getGPUId() const; + std::string_view getName() const; std::string getGfxip() const; uint32_t getMaxWaveSize() const; uint32_t getMaxQueueSize() const; @@ -61,26 +58,11 @@ class AgentInfo { uint32_t getPCIDomain() const; uint32_t getPCILocationID() const; uint32_t getXccCount() const; - - void setIndex(uint64_t index); - void setType(hsa_device_type_t type); - void setHandle(uint64_t handle); - void setName(const std::string& name); - - void setNumaNode(uint32_t numa_node); - uint32_t getNumaNode(); - - void setNearCpuAgent(hsa_agent_t near_cpu_agent); - hsa_agent_t getNearCpuAgent(); - - hsa_amd_memory_pool_t cpu_pool; - hsa_amd_memory_pool_t kernarg_pool; - hsa_amd_memory_pool_t gpu_pool; + uint64_t getUniqueGPUId() const; + uint32_t getNumaNode() const; private: - uint64_t index_; - hsa_device_type_t type_; // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 - uint64_t handle_; + char name_[64]; char gfxip_[64]; uint32_t max_wave_size_; @@ -95,12 +77,11 @@ class AgentInfo { uint32_t wave_slots_per_simd_; // Number of XCCs on the GPU uint32_t xcc_num_; - uint32_t pci_domain_; uint32_t pci_location_id_; - + uint64_t unique_gpu_id_; uint32_t numa_node_; - hsa_agent_t near_cpu_agent_; + uint32_t gpu_id_; }; // XXX TODO: This should be moved somewhere else so this file can be deleted diff --git a/projects/rocprofiler/src/core/hsa/hsa_common.cpp b/projects/rocprofiler/src/core/hsa/hsa_common.cpp deleted file mode 100644 index 0c55d6b2d5..0000000000 --- a/projects/rocprofiler/src/core/hsa/hsa_common.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "hsa_common.h" - -#include "src/utils/exception.h" - -namespace rocprofiler { - -namespace hsa_support { - -std::mutex agents_map_lock; -std::map agent_info_map; -Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle) { - std::lock_guard lock(agents_map_lock); - if (agent_info_map.find(handle) != agent_info_map.end()) { - return agent_info_map.at(handle); - } else { - std::cerr << std::string("Error: Can't find Agent with handle(") << std::to_string(handle) - << ") in this system" << std::endl; - abort(); - } -} - -std::vector cpu_agents_list; - -void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info) { - std::lock_guard lock(agents_map_lock); - agent_info_map.emplace(handle, agent_info); - if (agent_info.getType() == HSA_DEVICE_TYPE_GPU) { - cpu_agents_list.emplace_back(hsa_agent_t{handle}); - } -} - -std::vector& GetCPUAgentList() { return cpu_agents_list; } - -hsa_agent_t GetAgentByIndex(uint64_t agent_index) { - std::lock_guard lock(agents_map_lock); - for (auto& agent_info : agent_info_map) { - if (agent_info.second.getIndex() == agent_index) { - return hsa_agent_t{agent_info.second.getHandle()}; - } - } - std::cerr << std::string("Error: Can't find Agent with Index(") << std::to_string(agent_index) - << ") in this system" << std::endl; - abort(); -} - -CoreApiTable saved_core_api{}; -CoreApiTable& GetCoreApiTable() { return saved_core_api; } -void SetCoreApiTable(const CoreApiTable& table) { saved_core_api = table; } - -AmdExtTable saved_amd_ext_api{}; -AmdExtTable GetAmdExtTable() { return saved_amd_ext_api; } -void SetAmdExtTable(AmdExtTable* table) { saved_amd_ext_api = *table; } - -hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{}; -hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi() { return hsa_loader_api; } -void SetHSALoaderApi() { - hsa_status_t status = saved_core_api.hsa_system_get_major_extension_table_fn( - HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api); - - if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed"); -} - -void ResetMaps() { - if (hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(false); - status != HSA_STATUS_SUCCESS) - assert(!"hsa_amd_profiling_async_copy_enable failed"); - memset(&saved_core_api, '\0', sizeof(saved_core_api)); - memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api)); - memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api)); -} - -rocprofiler_timestamp_t GetCurrentTimestampNS() { - // If the HSA intercept is installed, then use the "original" - // 'hsa_system_get_info' function to avoid reporting calls for internal use - // of the HSA API by the tracer. - auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn; - - // If the HSA intercept is not installed, use the default - // 'hsa_system_get_info'. - if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info; - - uint64_t sysclock; - if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); - status == HSA_STATUS_ERROR_NOT_INITIALIZED) - return rocprofiler_timestamp_t{0}; - else if (status != HSA_STATUS_SUCCESS) - assert(!"hsa_system_get_info failed"); - - static uint64_t sysclock_period = [&]() { - uint64_t sysclock_hz = 0; - if (hsa_status_t status = - hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - status != HSA_STATUS_SUCCESS) - assert(!"hsa_system_get_info failed"); - - return (uint64_t)1000000000 / sysclock_hz; - }(); - - return rocprofiler_timestamp_t{sysclock * sysclock_period}; -} - -} // namespace hsa_support -} // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/hsa/hsa_support.cpp b/projects/rocprofiler/src/core/hsa/hsa_support.cpp index 828d9c7c33..62c20abd0d 100644 --- a/projects/rocprofiler/src/core/hsa/hsa_support.cpp +++ b/projects/rocprofiler/src/core/hsa/hsa_support.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ #include #include #include -#include + #include #include #include @@ -42,29 +43,28 @@ #include #include "core/hardware/hsa_info.h" -#include "core/hsa/hsa_common.h" #include "src/core/session/tracer/src/correlation_id.h" #include "src/core/session/tracer/src/exception.h" #include "src/core/session/tracer/src/roctracer.h" #include "src/utils/helper.h" - #include "src/core/hsa/queues/queue.h" #include "src/api/rocprofiler_singleton.h" #include "src/core/isa_capture/code_object_track.hpp" -#include -namespace fs = std::experimental::filesystem; + namespace { hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_agent_t agent, hsa_executable_symbol_t symbol, void* args) { hsa_symbol_kind_t type; - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsasupport_singleton.GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type); if (type == HSA_SYMBOL_KIND_KERNEL) { uint32_t name_length; - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( + hsasupport_singleton.GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length); // TODO(aelwazir): to be removed if the HSA fixed the issue of corrupted // names overflowing the length given @@ -72,15 +72,15 @@ hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_ if (!(*static_cast(args))) { char name[name_length + 1]; uint64_t kernel_object; - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( + hsasupport_singleton.GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( + hsasupport_singleton.GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); std::string kernel_name = std::string(name).substr(0, name_length); rocprofiler::AddKernelName(kernel_object, kernel_name); } else { uint64_t kernel_object; - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( + hsasupport_singleton.GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel_object); rocprofiler::RemoveKernelName(kernel_object); } @@ -112,16 +112,6 @@ namespace roctracer::hsa_support { namespace { -CoreApiTable saved_core_api{}; -AmdExtTable saved_amd_ext_api{}; -hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{}; - -struct AgentInfo { - uint32_t id; - hsa_device_type_t type; -}; -std::unordered_map agent_info_map; - class Tracker { public: enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; @@ -168,28 +158,34 @@ class Tracker { entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->valid.store(ENTRY_INIT, std::memory_order_release); - + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); // Creating a proxy signal - status = rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, - &(entry->signal)); + status = + hsasupport_singleton.GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &(entry->signal)); if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_signal_create failed"); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_signal_async_handler_fn( entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_amd_signal_async_handler failed"); } // Delete tracker entry inline static void Disable(entry_t* entry) { - rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(entry->signal); + rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_destroy_fn( + entry->signal); entry->valid.store(ENTRY_INV, std::memory_order_release); } private: // Entry completion inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); static roctracer_timestamp_t sysclock_period = []() { uint64_t sysclock_hz = 0; - hsa_status_t status = rocprofiler::hsa_support::GetCoreApiTable().hsa_system_get_info_fn( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = hsasupport_singleton.GetCoreApiTable().hsa_system_get_info_fn( HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_system_get_info failed"); return (uint64_t)1000000000 / sysclock_hz; @@ -198,7 +194,7 @@ class Tracker { if (entry->type == COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_get_async_copy_time_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_async_copy_time_fn( entry->signal, &async_copy_time); if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_amd_profiling_get_async_copy_time failed"); @@ -225,11 +221,11 @@ class Tracker { orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; [[maybe_unused]] const hsa_signal_value_t new_value = - rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(orig) - 1; + hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(orig) - 1; assert(signal_value == new_value && "Tracker::Complete bad signal value"); - rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_store_screlease_fn(orig, signal_value); + hsasupport_singleton.GetCoreApiTable().hsa_signal_store_screlease_fn(orig, signal_value); } - rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(signal); + hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(signal); delete entry; } @@ -246,17 +242,19 @@ class Tracker { }; hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_allocate_fn(region, size, ptr); + hsasupport_singleton.GetCoreApiTable().hsa_memory_allocate_fn(region, size, ptr); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) { hsa_evt_data_t data{}; data.allocate.ptr = *ptr; data.allocate.size = size; - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_region_get_info_fn( + if (hsasupport_singleton.GetCoreApiTable().hsa_region_get_info_fn( region, HSA_REGION_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS || - rocprofiler::hsa_support::GetCoreApiTable().hsa_region_get_info_fn( + hsasupport_singleton.GetCoreApiTable().hsa_region_get_info_fn( region, HSA_REGION_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_region_get_info failed"); @@ -268,14 +266,16 @@ hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, v hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent, hsa_access_permission_t access) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_assign_agent_fn(ptr, agent, access); + hsasupport_singleton.GetCoreApiTable().hsa_memory_assign_agent_fn(ptr, agent, access); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) { hsa_evt_data_t data{}; data.device.ptr = ptr; - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_agent_get_info_fn( + if (hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( agent, HSA_AGENT_INFO_DEVICE, &data.device.type) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_agent_get_info failed"); @@ -286,8 +286,9 @@ hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent, } hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) { - hsa_status_t status = - rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(dst, src, size); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(dst, src, size); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY)) { @@ -304,8 +305,10 @@ hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) { hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags, void** ptr) { - hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( - pool, size, flags, ptr); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = + hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr); if (size == 0 || status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) { @@ -313,9 +316,9 @@ hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size data.allocate.ptr = *ptr; data.allocate.size = size; - if (rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( + if (hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS || - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_region_get_info failed"); @@ -327,28 +330,33 @@ hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size auto callback_data = std::make_pair(pool, ptr); auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) { auto [pool, ptr] = *reinterpret_cast(iterate_agent_callback_data); - + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); if (hsa_amd_memory_pool_access_t value; - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_memory_pool_get_info_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_memory_pool_get_info_fn( agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS || value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT) return HSA_STATUS_SUCCESS; + rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(agent.handle); - auto it = agent_info_map.find(agent.handle); - if (it == agent_info_map.end()) - rocprofiler::fatal("agent was not found in the agent_info map"); hsa_evt_data_t data{}; - data.device.type = it->second.type; - data.device.id = it->second.id; + + data.device.type = static_cast(agent_info.GetType()); + if (data.device.type == HSA_DEVICE_TYPE_GPU) + data.device.id = agent_info.GetDeviceInfo().getGPUId(); + else + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, + &data.device.id); + data.device.agent = agent; data.device.ptr = ptr; ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data); return HSA_STATUS_SUCCESS; }; - rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(agent_callback, - &callback_data); + rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_iterate_agents_fn( + agent_callback, &callback_data); } return HSA_STATUS_SUCCESS; @@ -363,7 +371,9 @@ hsa_status_t MemoryPoolFreeIntercept(void* ptr) { } if (ptr) - return rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn(ptr); + return rocprofiler::HSASupport_Singleton::GetInstance() + .GetAmdExtTable() + .hsa_amd_memory_pool_free_fn(ptr); else return HSA_STATUS_SUCCESS; } @@ -371,20 +381,26 @@ hsa_status_t MemoryPoolFreeIntercept(void* ptr) { // Agent allow access callback 'hsa_amd_agents_allow_access' hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents, const uint32_t* flags, const void* ptr) { - hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( num_agents, agents, flags, ptr); if (status != HSA_STATUS_SUCCESS) return status; if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) { while (num_agents--) { hsa_agent_t agent = *agents++; - auto it = agent_info_map.find(agent.handle); - if (it == agent_info_map.end()) - rocprofiler::fatal("agent was not found in the agent_info map"); + rocprofiler::HSAAgentInfo agent_info = hsasupport_singleton.GetHSAAgentInfo(agent.handle); hsa_evt_data_t data{}; - data.device.type = it->second.type; - data.device.id = it->second.id; + data.device.type = (hsa_device_type_t)(agent_info.GetType()); + // ToDo:: Fixme the device id might not be unique across CPU and GPU. + // Along with device id, device type can be used to uniquely identify the device + if (data.device.type == HSA_DEVICE_TYPE_GPU) + data.device.id = agent_info.GetDeviceInfo().getGPUId(); + else + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_NODE, + &data.device.id); data.device.agent = agent; data.device.ptr = ptr; @@ -403,25 +419,26 @@ struct CodeObjectCallbackArg { hsa_status_t CodeObjectCallback(hsa_executable_t executable, hsa_loaded_code_object_t loaded_code_object, void* arg) { hsa_evt_data_t data{}; - - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE, &data.codeobj.storage_type) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) { - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE, &data.codeobj.storage_file) != HSA_STATUS_SUCCESS || data.codeobj.storage_file == -1) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); data.codeobj.memory_base = data.codeobj.memory_size = 0; } else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) { - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE, &data.codeobj.memory_base) != HSA_STATUS_SUCCESS || - rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE, &data.codeobj.memory_size) != HSA_STATUS_SUCCESS) @@ -434,29 +451,29 @@ hsa_status_t CodeObjectCallback(hsa_executable_t executable, rocprofiler::fatal("unknown code object storage type: %d", data.codeobj.storage_type); } - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &data.codeobj.load_base) != HSA_STATUS_SUCCESS || - rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &data.codeobj.load_size) != HSA_STATUS_SUCCESS || - rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &data.codeobj.load_delta) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &data.codeobj.uri_length) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); std::string uri_str(data.codeobj.uri_length, '\0'); - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); - if (rocprofiler::hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( + if (hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT, &data.codeobj.agent) != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed"); @@ -482,15 +499,16 @@ hsa_status_t CodeObjectCallback(hsa_executable_t executable, } hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_freeze_fn(executable, options); + hsasupport_singleton.GetCoreApiTable().hsa_executable_freeze_fn(executable, options); if (status != HSA_STATUS_SUCCESS) return status; // if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) { bool unload = false; - rocprofiler::hsa_support::GetHSALoaderApi() - .hsa_ven_amd_loader_executable_iterate_loaded_code_objects(executable, CodeObjectCallback, - &unload); + hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + executable, CodeObjectCallback, &unload); // } return HSA_STATUS_SUCCESS; @@ -499,19 +517,22 @@ hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) { // if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) { bool unload = true; - rocprofiler::hsa_support::GetHSALoaderApi() - .hsa_ven_amd_loader_executable_iterate_loaded_code_objects(executable, CodeObjectCallback, - &unload); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + executable, CodeObjectCallback, &unload); // } - return rocprofiler::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable); + return hsasupport_singleton.GetCoreApiTable().hsa_executable_destroy_fn(executable); } std::atomic profiling_async_copy_enable{false}; hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(enable); + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(enable); if (status == HSA_STATUS_SUCCESS) { profiling_async_copy_enable.exchange(enable, std::memory_order_release); } @@ -520,15 +541,33 @@ hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) { void MemoryASyncCopyHandler(const Tracker::entry_t* entry) { activity_record_t record{}; + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); record.domain = ACTIVITY_DOMAIN_HSA_OPS; record.op = HSA_OP_ID_COPY; record.begin_ns = entry->begin; record.end_ns = entry->end; - record.device_id = (entry->agent.handle > 0) - ? rocprofiler::hsa_support::GetAgentInfo(entry->agent.handle).getIndex() - : (entry->copy.dst_agent.handle > 0) - ? rocprofiler::hsa_support::GetAgentInfo(entry->copy.dst_agent.handle).getIndex() - : 0; + if (entry->agent.handle > 0) { + //FIXME: Not a unique id across GPU and CPU + rocprofiler::HSAAgentInfo& agent_info = + hsasupport_singleton.GetHSAAgentInfo(entry->agent.handle); + if (agent_info.GetType() == HSA_DEVICE_TYPE_GPU) + record.device_id = agent_info.GetDeviceInfo().getGPUId(); + else + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + entry->agent, HSA_AGENT_INFO_NODE, &record.device_id); + } else if (entry->copy.dst_agent.handle > 0) { + rocprofiler::HSAAgentInfo& agent_info = + hsasupport_singleton.GetHSAAgentInfo(entry->copy.dst_agent.handle); + if (agent_info.GetType() == HSA_DEVICE_TYPE_GPU) + record.device_id = agent_info.GetDeviceInfo().getGPUId(); + else + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + entry->copy.dst_agent, HSA_AGENT_INFO_NODE, &record.device_id); + } else + record.device_id = 0; + + record.correlation_id = entry->correlation_id; ReportActivity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY, &record); } @@ -538,15 +577,16 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); - + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { - return rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_fn( + return hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); } @@ -557,7 +597,7 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo entry->copy.dst_agent = dst_agent; Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -572,15 +612,16 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); - + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { - return rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_rect_fn( + return hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_rect_fn( dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, completion_signal); } @@ -591,7 +632,7 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, entry->agent = copy_agent; Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_rect_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_rect_fn( dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -604,14 +645,16 @@ hsa_status_t MemoryASyncCopyOnEngineIntercept( uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal, hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) { bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); - + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); // FIXME: what happens if the state changes before returning? - [[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn( - profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); + [[maybe_unused]] hsa_status_t status = + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( + profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { - return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( + return hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_on_engine_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal, engine_id, force_copy_on_sdma); } @@ -623,7 +666,7 @@ hsa_status_t MemoryASyncCopyOnEngineIntercept( entry->copy.dst_agent = dst_agent; Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_async_copy_on_engine_fn( dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id, force_copy_on_sdma); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -633,74 +676,6 @@ hsa_status_t MemoryASyncCopyOnEngineIntercept( } // namespace -rocprofiler_timestamp_t timestamp_ns() { - // If the HSA intercept is installed, then use the "original" - // 'hsa_system_get_info' function to avoid reporting calls for internal use - // of the HSA API by the tracer. - auto hsa_system_get_info_fn = rocprofiler::hsa_support::GetCoreApiTable().hsa_system_get_info_fn; - - // If the HSA intercept is not installed, use the default - // 'hsa_system_get_info'. - if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info; - - uint64_t sysclock; - if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); - status == HSA_STATUS_ERROR_NOT_INITIALIZED) - return rocprofiler_timestamp_t{0}; - else if (status != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_system_get_info failed"); - - static uint64_t sysclock_period = [&]() { - uint64_t sysclock_hz = 0; - if (hsa_status_t status = - hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - status != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_system_get_info failed"); - - return (uint64_t)1000000000 / sysclock_hz; - }(); - - return rocprofiler_timestamp_t{sysclock * sysclock_period}; -} - -void Initialize_roctracer(HsaApiTable* table) { - // Save the HSA core api and amd_ext api. - saved_core_api = rocprofiler::hsa_support::GetCoreApiTable(); - saved_amd_ext_api = rocprofiler::hsa_support::GetAmdExtTable(); - hsa_loader_api = rocprofiler::hsa_support::GetHSALoaderApi(); - - // Enumerate the agents. - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - hsa_support::AgentInfo agent_info; - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_agent_get_info_fn( - agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info failed"); - switch (agent_info.type) { - case HSA_DEVICE_TYPE_CPU: - static int cpu_agent_count = 0; - agent_info.id = cpu_agent_count++; - break; - case HSA_DEVICE_TYPE_GPU: { - uint32_t driver_node_id; - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), - &driver_node_id) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info failed"); - - agent_info.id = driver_node_id; - } break; - default: - static int other_agent_count = 0; - agent_info.id = other_agent_count++; - break; - } - hsa_support::agent_info_map.emplace(agent.handle, agent_info); - return HSA_STATUS_SUCCESS; - }, - nullptr) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_iterate_agents failed"); -} const char* GetApiName(uint32_t id) { return detail::GetApiName(id); } @@ -749,12 +724,118 @@ void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t o namespace rocprofiler { -namespace hsa_support { -hsa_agent_t cpu_agent; -std::map> queues; -std::atomic active_queues{0}; +std::atomic cpu_agent; + +HSASupport_Singleton& HSASupport_Singleton::GetInstance() { + static HSASupport_Singleton* instance = new HSASupport_Singleton; + return *instance; +} + +CoreApiTable& HSASupport_Singleton::GetCoreApiTable() { return saved_core_api; } + +void HSASupport_Singleton::SetCoreApiTable(CoreApiTable& table) { saved_core_api = table; } + +AmdExtTable& HSASupport_Singleton::GetAmdExtTable() { return saved_amd_ext_api; } + +void HSASupport_Singleton::SetAmdExtTable(AmdExtTable& table) { saved_amd_ext_api = table; } + +hsa_ven_amd_loader_1_01_pfn_t& HSASupport_Singleton::GetHSALoaderApi() { return hsa_loader_api; } + +void HSASupport_Singleton::SetHSALoaderApi() { + hsa_status_t status = GetCoreApiTable().hsa_system_get_major_extension_table_fn( + HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api); + if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed"); +} + + +const Agent::DeviceInfo& HSAAgentInfo::GetDeviceInfo() const { + if (type_ == HSA_DEVICE_TYPE_GPU) + return device_info_; + assert("Attempting to read deviceInfo for a CPU agent"); +} + +uint64_t HSAAgentInfo::getHandle() const { return agent_.handle; } + +hsa_agent_t HSAAgentInfo::GetNearCpuAgent() const { return near_cpu_agent_; } +hsa_device_type_t HSAAgentInfo::GetType() const { return type_; } +void HSAAgentInfo::SetNearCpuAgent(hsa_agent_t near_cpu_agent) { near_cpu_agent_ = near_cpu_agent; } + +void HSAAgentInfo::SetDeviceInfo(Agent::DeviceInfo device_info) { device_info_ = device_info; } + +HSAAgentInfo& HSASupport_Singleton::GetHSAAgentInfo(uint64_t agent_handle) { + std::lock_guard info_map_lock(info_map_mutex_); + auto it = HSAagent_info_map_.find(agent_handle); + if (it == HSAagent_info_map_.end()) + rocprofiler::fatal("HSA AgentInfo is not found for the given handle:%ld", agent_handle); + return it->second; +} + +HSAAgentInfo& HSASupport_Singleton::GetHSAAgentInfo(Agent::DeviceInfo device_info) { + std::lock_guard info_map_lock(info_map_mutex_); + for (auto it = HSAagent_info_map_.begin(); it != HSAagent_info_map_.end(); it++) { + uint64_t gpuid = it->second.GetDeviceInfo().getGPUId(); + if (gpuid == device_info.getGPUId()) { + return it->second; + } + } + rocprofiler::fatal("HSA AgentInfo is not found for the given device with uuid %lu", + device_info.getGPUId()); +} + +void HSASupport_Singleton::SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info) { + std::lock_guard info_map_lock(info_map_mutex_); + HSAagent_info_map_.emplace(agent.handle, hsa_agent_info); +} + +void HSASupport_Singleton::InitKsymbols() { + if (ksymbols_flag.load(std::memory_order_relaxed)) { + { + std::lock_guard lock(ksymbol_map_lock); + ksymbols = new std::map(); + ksymbols_flag.exchange(false, std::memory_order_release); + } + { + std::lock_guard lock(kernel_names_map_lock); + kernel_names = new std::map>(); + kernel_names_flag.exchange(false, std::memory_order_release); + } + } +} +void HSASupport_Singleton::FinitKsymbols() { + if (!ksymbols_flag.load(std::memory_order_relaxed)) { + std::lock_guard lock(ksymbol_map_lock); + ksymbols->clear(); + delete ksymbols; + ksymbols_flag.exchange(true, std::memory_order_release); + } + if (!kernel_names_flag.load(std::memory_order_relaxed)) { + std::lock_guard lock(kernel_names_map_lock); + kernel_names->clear(); + delete kernel_names; + kernel_names_flag.exchange(true, std::memory_order_release); + } +} + + + +void queues_deleter ::operator()(void* queue) const { delete static_cast(queue); } + + + void HSASupport_Singleton::AddQueue(hsa_queue_t* queue, std::unique_ptrrocprofiler_queue) { + std::lock_guard queues_mutex_lock(queues_mutex_); + queues.emplace(queue, std::move(rocprofiler_queue)); +} + +void HSASupport_Singleton::RemoveQueue(hsa_queue_t* queue) { + std::lock_guard queues_mutex_lock(queues_mutex_); + auto it = queues.find(queue); + if (it == queues.end()) { + fatal("Trying to destroy a non-existent queue in the profiler"); + } + queues.erase(it); +} /** * @brief This function is a queue create interceptor. It intercepts the queue * creation, registers the profiler, and registers a packet write interceptor. @@ -767,12 +848,25 @@ hsa_status_t QueueCreateInterceptor(hsa_agent_t agent, uint32_t size, hsa_queue_ void* data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue) { // TODO(aelwazir): Queue ID - static std::mutex qc_mutex; - std::lock_guard lk(qc_mutex); - queues.emplace(active_queues.fetch_add(1, std::memory_order_release), - std::make_unique(cpu_agent, agent, size, type, callback, data, - private_segment_size, group_segment_size, queue)); + HSASupport_Singleton& instance = HSASupport_Singleton::GetInstance(); + + queues_deleter deleter; + hsa_status_t status = instance.GetAmdExtTable().hsa_amd_queue_intercept_create_fn( + agent, size, type, callback, data, private_segment_size, group_segment_size, queue); + + if (status != HSA_STATUS_SUCCESS) return status; + + status = instance.GetAmdExtTable().hsa_amd_profiling_set_profiler_enabled_fn(*queue, true); + if (status != HSA_STATUS_SUCCESS) fatal("Failed to enable the profiling on the queue"); + + std::unique_ptr rocprofiler_queue( + new queue::Queue(cpu_agent.load(std::memory_order_relaxed), agent, *queue), deleter); + + status = instance.GetAmdExtTable().hsa_amd_queue_intercept_register_fn( + *queue, queue::Queue::WriteInterceptor, rocprofiler_queue.get()); + if (status != HSA_STATUS_SUCCESS) fatal("Failed to regiter write interceptor for the queue"); + instance.AddQueue(*queue, std::move(rocprofiler_queue)); return HSA_STATUS_SUCCESS; } @@ -783,214 +877,20 @@ hsa_status_t QueueCreateInterceptor(hsa_agent_t agent, uint32_t size, hsa_queue_ **/ hsa_status_t QueueDestroyInterceptor(hsa_queue_t* hsa_queue) { - static std::mutex qd_mutex; - std::lock_guard lk(qd_mutex); - ASSERTM(GetCoreApiTable().hsa_queue_destroy_fn(hsa_queue) == HSA_STATUS_SUCCESS, - "Queue couldn't be destroyed!"); - queues.erase(active_queues.fetch_sub(1, std::memory_order_release)); + HSASupport_Singleton& instance = HSASupport_Singleton::GetInstance(); + hsa_status_t status = instance.GetCoreApiTable().hsa_queue_destroy_fn(hsa_queue); + if (status != HSA_STATUS_SUCCESS) return status; + instance.RemoveQueue(hsa_queue); return HSA_STATUS_SUCCESS; } +bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) { -std::unordered_map numa_node_to_cpu_agent; -std::unordered_map gpu_numa_nodes_near_cpu; -std::vector gpu_agents; - -void Initialize(HsaApiTable* table) { - InitKsymbols(); - // Save the HSA core api and amd_ext api. - long long gpu_numa_nodes_start = 0; - - SetCoreApiTable(*table->core_); - SetAmdExtTable(table->amd_ext_); - - // TODO(aelwazir): FIXME, this is a workaround for the issue of allocating buffers on KernArg - // Pools that are nearest to the GPU which is not NUMA local to the CPU. This should be remove - // once ROCR provides such API. - - std::string path = "/sys/class/kfd/kfd/topology/nodes"; - for (const auto& entry : fs::directory_iterator(path)) { - long long node_id = std::stoll(entry.path().filename().c_str()); - std::ifstream gpu_id_file; - std::string gpu_path = entry.path().c_str(); - gpu_path += "/gpu_id"; - gpu_id_file.open(gpu_path); - std::string gpu_id_str; - if (gpu_id_file.is_open()) { - gpu_id_file >> gpu_id_str; - - if (!gpu_id_str.empty()) { - long long gpu_id = std::stoll(gpu_id_str); - if (gpu_id > 0) { - gpu_numa_nodes_start = (gpu_numa_nodes_start > node_id || gpu_numa_nodes_start == 0) - ? node_id - : gpu_numa_nodes_start; - } - } - } - gpu_id_file.close(); + static std::map metricsDicts; + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + for(auto it = hsasupport_singleton.gpu_agents.begin(); it != hsasupport_singleton.gpu_agents.end(); it++) { + HSAAgentInfo& agent_Info = hsasupport_singleton.GetHSAAgentInfo(it->handle); + metricsDicts.emplace(agent_Info.getHandle(), rocprofiler::MetricsDict::Create(&agent_Info)) ; } - path = "/sys/class/kfd/kfd/topology/nodes"; - for (const auto& entry : fs::directory_iterator(path)) { - long long node_id = std::stoll(entry.path().filename().c_str()); - std::string numa_node_path = entry.path().c_str(); - long long agent_id = std::stoll(entry.path().filename().c_str()); - if (agent_id >= gpu_numa_nodes_start) { - numa_node_path += "/io_links"; - for (const auto& numa_node_entry : fs::directory_iterator(numa_node_path)) { - std::string numa_node_entry_properties_path = numa_node_entry.path().c_str(); - numa_node_entry_properties_path += "/properties"; - std::ifstream gpu_properties_file; - gpu_properties_file.open(numa_node_entry_properties_path); - std::string gpu_properties_file_line; - if (gpu_properties_file.is_open()) { - while (gpu_properties_file) { - std::getline(gpu_properties_file, gpu_properties_file_line); - std::string delimiter = " "; - std::stringstream ss(gpu_properties_file_line); - std::string word; - ss >> word; - if (word.compare("node_to") == 0) { - ss >> word; - long long near_cpu_node_id = std::stoll(word); - if (near_cpu_node_id < gpu_numa_nodes_start) { - gpu_numa_nodes_near_cpu[node_id] = near_cpu_node_id; - } - } - } - } - gpu_properties_file.close(); - } - } - } - - // Enumerate the agents. - if (GetCoreApiTable().hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - Agent::AgentInfo agent_info{agent, &GetCoreApiTable()}; - static int cpu_agent_count = 0; - static int other_agent_count = 0; - switch (agent_info.getType()) { - case HSA_DEVICE_TYPE_CPU: - agent_info.setIndex(cpu_agent_count++); - cpu_agent = agent; - rocprofiler::queue::InitializePools(cpu_agent, &agent_info); - uint32_t cpu_numa_node_id; - // Change into KFD GPU ID - if (GetCoreApiTable().hsa_agent_get_info_fn( - agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info(HSA_AGENT_INFO_NODE) failed"); - agent_info.setNumaNode(cpu_numa_node_id); - numa_node_to_cpu_agent[cpu_numa_node_id] = agent; - break; - case HSA_DEVICE_TYPE_GPU: - // TODO(FIXME): When multiple ranks are used, each rank's first - // logical device always has GPU ID 0, regardless of which - // physical device is selected with CUDA_VISIBLE_DEVICES. - // Because of this, when merging traces from multiple ranks, - // GPU IDs from different processes may overlap. - // - // The long term solution is to use KFD's gpu_id, which is - // stable across APIs and processes, but it isn't currently - // exposed by ROCr. We could use the agent's - // HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even - // that would be an improvement--it's what legacy roctracer - // is currently doing as well as the roctracer compatibility - // code earlier in this file. - uint32_t driver_node_id; - if (rocprofiler::hsa_support::GetCoreApiTable().hsa_agent_get_info_fn( - agent, static_cast(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), - &driver_node_id) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info failed"); - agent_info.setIndex(driver_node_id); - uint32_t gpu_cpu_numa_node_id; - if (GetCoreApiTable().hsa_agent_get_info_fn( - agent, HSA_AGENT_INFO_NODE, &gpu_cpu_numa_node_id) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_agent_get_info(HSA_AGENT_INFO_NODE) failed"); - agent_info.setNumaNode(gpu_cpu_numa_node_id); - agent_info.setNearCpuAgent( - numa_node_to_cpu_agent[gpu_numa_nodes_near_cpu[gpu_cpu_numa_node_id]]); - rocprofiler::queue::InitializeGPUPool(agent, &agent_info); - gpu_agents.push_back(agent); - break; - default: - agent_info.setIndex(other_agent_count++); - break; - } - SetAgentInfo(agent.handle, agent_info); - return HSA_STATUS_SUCCESS; - }, - nullptr) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_iterate_agents failed"); - - for (auto& agent : gpu_agents) { - GetAgentInfo(agent.handle).cpu_pool = - GetAgentInfo(GetAgentInfo(agent.handle).getNearCpuAgent().handle).cpu_pool; - GetAgentInfo(agent.handle).kernarg_pool = - GetAgentInfo(GetAgentInfo(agent.handle).getNearCpuAgent().handle).kernarg_pool; - } - - rocprofiler::queue::CheckPacketReqiurements(gpu_agents); - - gpu_agents.clear(); - numa_node_to_cpu_agent.clear(); - gpu_numa_nodes_near_cpu.clear(); - - SetHSALoaderApi(); - - roctracer::hsa_support::Initialize_roctracer(table); - - // Install the Queue intercept - table->core_->hsa_queue_create_fn = QueueCreateInterceptor; - table->core_->hsa_queue_destroy_fn = QueueDestroyInterceptor; - - // Install the HSA_OPS intercept - table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_support::MemoryASyncCopyIntercept; - table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = - roctracer::hsa_support::MemoryASyncCopyRectIntercept; - table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = - roctracer::hsa_support::ProfilingAsyncCopyEnableIntercept; - table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = - roctracer::hsa_support::MemoryASyncCopyOnEngineIntercept; - - // Install the HSA_EVT intercept - table->core_->hsa_memory_allocate_fn = roctracer::hsa_support::MemoryAllocateIntercept; - table->core_->hsa_memory_assign_agent_fn = roctracer::hsa_support::MemoryAssignAgentIntercept; - table->core_->hsa_memory_copy_fn = roctracer::hsa_support::MemoryCopyIntercept; - table->amd_ext_->hsa_amd_memory_pool_allocate_fn = - roctracer::hsa_support::MemoryPoolAllocateIntercept; - table->amd_ext_->hsa_amd_memory_pool_free_fn = roctracer::hsa_support::MemoryPoolFreeIntercept; - table->amd_ext_->hsa_amd_agents_allow_access_fn = - roctracer::hsa_support::AgentsAllowAccessIntercept; - table->core_->hsa_executable_freeze_fn = roctracer::hsa_support::ExecutableFreezeIntercept; - table->core_->hsa_executable_destroy_fn = roctracer::hsa_support::ExecutableDestroyIntercept; - - // Install the HSA_API wrappers - roctracer::hsa_support::detail::InstallCoreApiWrappers(table->core_); - roctracer::hsa_support::detail::InstallAmdExtWrappers(table->amd_ext_); - roctracer::hsa_support::detail::InstallImageExtWrappers(table->image_ext_); -} - -void Finalize() { - while (active_queues.load(std::memory_order_relaxed) != 0) { - } - - // FinitKsymbols(); - ResetMaps(); -} - -static std::map metricsDicts; - -bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) { - if (GetCoreApiTable().hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* data) { - Agent::AgentInfo agent_info{agent, &GetCoreApiTable()}; - if (agent_info.getType() == HSA_DEVICE_TYPE_GPU) { - metricsDicts.emplace(agent.handle, rocprofiler::MetricsDict::Create(&agent_info)); - } - return HSA_STATUS_SUCCESS; - }, - nullptr) != HSA_STATUS_SUCCESS) - rocprofiler::fatal("hsa_iterate_agents failed"); uint32_t gpu_counter = 0; for (auto metricsDictAgent : metricsDicts) { rocprofiler::MetricsDict* metricsDict = metricsDictAgent.second; @@ -1061,8 +961,127 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback // } } - return true; + return true; } -} // namespace hsa_support + + + +void HSASupport_Singleton::HSAInitialize(HsaApiTable* table) { + InitKsymbols(); + // Save the HSA core api and amd_ext api. + + SetCoreApiTable(*table->core_); + SetAmdExtTable(*table->amd_ext_); + + // TODO(aelwazir): FIXME, this is a workaround for the issue of allocating buffers on KernArg + // Pools that are nearest to the GPU which is not NUMA local to the CPU. This should be remove + // once ROCR provides such API. + + // Enumerate the agents. + if (GetCoreApiTable().hsa_iterate_agents_fn( + [](hsa_agent_t agent, void* data) { + ROCProfiler_Singleton& rocprofiler_instance = ROCProfiler_Singleton::GetInstance(); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + hsa_device_type_t device_type; + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + agent, HSA_AGENT_INFO_DEVICE, &device_type); + switch (device_type) { + case HSA_DEVICE_TYPE_CPU: { + // FixMe: Multiprocess CPU for eg: in NUMA architecture + cpu_agent = agent; + rocprofiler::HSAAgentInfo agent_info(agent, device_type); + Packet::InitializePools(cpu_agent, &agent_info); + hsasupport_singleton.SetHSAAgentInfo(agent, agent_info); + break; + } + case HSA_DEVICE_TYPE_GPU: { + // TODO(FIXME): When multiple ranks are used, each rank's first + // logical device always has GPU ID 0, regardless of which + // physical device is selected with CUDA_VISIBLE_DEVICES. + // Because of this, when merging traces from multiple ranks, + // GPU IDs from different processes may overlap. + // + // The long term solution is to use KFD's gpu_id, which is + // stable across APIs and processes, but it isn't currently + // exposed by ROCr. We could use the agent's + // HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even + // that would be an improvement--it's what legacy roctracer + // is currently doing as well as the roctracer compatibility + // code earlier in this file. + uint32_t gpu_id = 0; + + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + agent, (hsa_agent_info_t)(HSA_AMD_AGENT_INFO_DRIVER_UID), &gpu_id); + const Agent::DeviceInfo& device_info = rocprofiler_instance.GetDeviceInfo(gpu_id); + hsa_agent_t nearCpuAgent; + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + agent, (hsa_agent_info_t)(HSA_AMD_AGENT_INFO_NEAREST_CPU), &nearCpuAgent); + rocprofiler::HSAAgentInfo agent_info(agent, device_type); + agent_info.SetNearCpuAgent(nearCpuAgent); + agent_info.SetDeviceInfo(device_info); + Packet::InitializeGPUPool(agent, &agent_info); + hsasupport_singleton.SetHSAAgentInfo(agent, agent_info); + hsasupport_singleton.gpu_agents.push_back(agent); + break; + } + default: + break; + } + + return HSA_STATUS_SUCCESS; + }, + nullptr) != HSA_STATUS_SUCCESS) + rocprofiler::fatal("hsa_iterate_agents failed"); + + for (auto& agent : gpu_agents) { + HSAAgentInfo& agent_info = GetHSAAgentInfo(agent.handle); + hsa_agent_t near_cpu_node = agent_info.GetNearCpuAgent(); + HSAAgentInfo& near_cpu_agent_info = GetHSAAgentInfo(near_cpu_node.handle); + agent_info.cpu_pool_ = near_cpu_agent_info.cpu_pool_; + agent_info.kernarg_pool_ = near_cpu_agent_info.kernarg_pool_; + } + + rocprofiler::queue::CheckPacketReqiurements(); + SetHSALoaderApi(); + + // Install the Queue intercept + table->core_->hsa_queue_create_fn = QueueCreateInterceptor; + table->core_->hsa_queue_destroy_fn = QueueDestroyInterceptor; + + // Install the HSA_OPS intercept + table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_support::MemoryASyncCopyIntercept; + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = + roctracer::hsa_support::MemoryASyncCopyRectIntercept; + table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = + roctracer::hsa_support::ProfilingAsyncCopyEnableIntercept; + table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = + roctracer::hsa_support::MemoryASyncCopyOnEngineIntercept; + + // Install the HSA_EVT intercept + table->core_->hsa_memory_allocate_fn = roctracer::hsa_support::MemoryAllocateIntercept; + table->core_->hsa_memory_assign_agent_fn = roctracer::hsa_support::MemoryAssignAgentIntercept; + table->core_->hsa_memory_copy_fn = roctracer::hsa_support::MemoryCopyIntercept; + table->amd_ext_->hsa_amd_memory_pool_allocate_fn = + roctracer::hsa_support::MemoryPoolAllocateIntercept; + table->amd_ext_->hsa_amd_memory_pool_free_fn = roctracer::hsa_support::MemoryPoolFreeIntercept; + table->amd_ext_->hsa_amd_agents_allow_access_fn = + roctracer::hsa_support::AgentsAllowAccessIntercept; + table->core_->hsa_executable_freeze_fn = roctracer::hsa_support::ExecutableFreezeIntercept; + table->core_->hsa_executable_destroy_fn = roctracer::hsa_support::ExecutableDestroyIntercept; + + // Install the HSA_API wrappers + roctracer::hsa_support::detail::InstallCoreApiWrappers(table->core_); + roctracer::hsa_support::detail::InstallAmdExtWrappers(table->amd_ext_); + roctracer::hsa_support::detail::InstallImageExtWrappers(table->image_ext_); +} + +void HSASupport_Singleton::HSAFinalize() { + std::lock_guard queues_mutex_lock(queues_mutex_); + queues.clear(); + // table gets reset by rocr runtime + FinitKsymbols(); +} + + } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/hsa/hsa_support.h b/projects/rocprofiler/src/core/hsa/hsa_support.h index b209c04a2e..6d97360e03 100644 --- a/projects/rocprofiler/src/core/hsa/hsa_support.h +++ b/projects/rocprofiler/src/core/hsa/hsa_support.h @@ -29,8 +29,10 @@ #include #include +#include +#include -#include "hsa_common.h" +#include "rocprofiler.h" #include "src/core/hardware/hsa_info.h" // HSA EVT data type @@ -92,15 +94,78 @@ typedef struct { namespace rocprofiler { -namespace hsa_support { -void Initialize(HsaApiTable* Table); -hsa_status_t hsa_iterate_agents_cb(hsa_agent_t agent, void* data); -void Finalize(); +class HSAAgentInfo { + private: + hsa_agent_t agent_; + Agent::DeviceInfo device_info_; + hsa_agent_t near_cpu_agent_; + hsa_device_type_t type_; -bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback); -} // namespace hsa_support + public: + HSAAgentInfo(hsa_agent_t agent, hsa_device_type_t type) : agent_(agent), type_(type){}; + uint64_t getHandle() const; + const Agent::DeviceInfo& GetDeviceInfo() const; + void SetNearCpuAgent(hsa_agent_t near_cpu_agent); + void SetDeviceInfo(Agent::DeviceInfo device_info); + hsa_agent_t GetNearCpuAgent() const; + hsa_device_type_t GetType() const; + hsa_amd_memory_pool_t cpu_pool_; + hsa_amd_memory_pool_t kernarg_pool_; + hsa_amd_memory_pool_t gpu_pool_; +}; + + +struct queues_deleter { + queues_deleter() {}; + queues_deleter(queues_deleter&) { }; + void operator() (void * queue) const; +}; + +class HSASupport_Singleton { + private: + HSASupport_Singleton() {}; + ~HSASupport_Singleton() = delete; + CoreApiTable saved_core_api; + AmdExtTable saved_amd_ext_api; + hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api; + std::mutex info_map_mutex_; + std::unordered_map HSAagent_info_map_; + std::atomic ksymbols_flag{true}; + std::atomic kernel_names_flag{true}; + std::mutex queues_mutex_; + std::unordered_map> queues; + void SetCoreApiTable(CoreApiTable& table); + void SetAmdExtTable(AmdExtTable& table); + void SetHSALoaderApi(); + + public: + std::vector gpu_agents; + HSAAgentInfo& GetHSAAgentInfo(uint64_t agent_handle); + HSAAgentInfo& GetHSAAgentInfo(Agent::DeviceInfo device_info); + Agent::DeviceInfo& GetDeviceInfo(HSAAgentInfo* agent_info); + std::mutex kernel_names_map_lock; + std::map>* kernel_names; + std::mutex ksymbol_map_lock; + std::map* ksymbols; + void SetHSAAgentInfo(hsa_agent_t agent, HSAAgentInfo hsa_agent_info); + static HSASupport_Singleton& GetInstance(); + CoreApiTable& GetCoreApiTable(); + AmdExtTable& GetAmdExtTable(); + hsa_ven_amd_loader_1_01_pfn_t& GetHSALoaderApi(); + void AddQueue(hsa_queue_t* queue, std::unique_ptr); + void RemoveQueue(hsa_queue_t* queue); + void HSAInitialize(HsaApiTable* Table); + void HSAFinalize(); + void InitKsymbols(); + void FinitKsymbols(); + HSASupport_Singleton(const HSASupport_Singleton&) = delete; + HSASupport_Singleton& operator=(const HSASupport_Singleton&) = delete; + +}; + +bool hsa_support_IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback); } // namespace rocprofiler #include "src/core/session/tracer/src/roctracer.h" @@ -126,11 +191,6 @@ uint32_t GetApiCode(const char* str); void RegisterTracerCallback(int (*function)(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id, void* data)); -rocprofiler_timestamp_t timestamp_ns(); - -void Initialize_roctracer(HsaApiTable* table); - - } // namespace roctracer::hsa_support #endif // SRC_CORE_HSA_HSA_SUPPORT_H_ diff --git a/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp b/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp index 083ace9596..20cb11e1d1 100644 --- a/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp +++ b/projects/rocprofiler/src/core/hsa/packets/packets_generator.cpp @@ -40,7 +40,6 @@ #include "src/core/counters/basic/basic_counter.h" #include "src/utils/exception.h" #include "src/utils/logger.h" -#include "src/core/hsa/hsa_common.h" #include "src/core/counters/metrics/metrics.h" #include "src/core/hardware/hsa_info.h" @@ -83,13 +82,14 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool if (nullptr == data) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info"); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( + err = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_get_info_fn( pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); ASSERTM(err != HSA_STATUS_ERROR, "hsa_amd_memory_pool_get_info"); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; @@ -114,20 +114,22 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) { return FindGlobalPool(pool, data, true); } -void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) { +void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn( - cpu_agent, FindStandardPool, &(agent_info->cpu_pool)); + hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn( + cpu_agent, FindStandardPool, &(agent_info->cpu_pool_)); CHECK_HSA_STATUS("Error: Command Buffer Pool is not initialized", status); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn( - cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool)); + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn( + cpu_agent, FindKernArgPool, &(agent_info->kernarg_pool_)); CHECK_HSA_STATUS("Error: Output Buffer Pool is not initialized", status); } -void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) { +void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); hsa_status_t status = - hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, &(agent_info->gpu_pool)); + hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, &(agent_info->gpu_pool_)); CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status); } @@ -136,13 +138,18 @@ struct block_des_t { uint32_t index; }; -std::map metricsDict; + static std::atomic counters_added{false}; -void CheckPacketReqiurements(std::vector& gpu_agents) { - for (auto& gpu_agent : gpu_agents) { + +std::map metricsDict; + + +void CheckPacketReqiurements() { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + for (auto& gpu_agent : hsasupport_singleton.gpu_agents) { // get the instance of MetricsDict - Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle); + rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle); metricsDict[gpu_agent.handle] = rocprofiler::MetricsDict::Create(&agentInfo); } } @@ -155,18 +162,18 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, std::vector& counter_names, rocprofiler_session_id_t session_id, bool is_spm) { hsa_status_t status = HSA_STATUS_SUCCESS; - + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); if (!counters_added.load(std::memory_order_acquire)) { for (auto& name : counter_names) { - rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id) - ->GetProfiler() - ->AddCounterName(name); + if (rocprofiler_singleton.HasActiveSession()) { + rocprofiler_singleton.GetSession(session_id)->GetProfiler()->AddCounterName(name); + } } counters_added.exchange(true, std::memory_order_release); } - Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle); + rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle); std::map results_map; std::vector events_list; std::vector results_list; @@ -330,8 +337,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, << "Error: Command buffer given size is " << size << std::endl; abort(); } - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( - agentInfo.cpu_pool, size, 0, reinterpret_cast(&(profile->command_buffer.ptr))); + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( + agentInfo.cpu_pool_, size, 0, reinterpret_cast(&(profile->command_buffer.ptr))); if (status != HSA_STATUS_SUCCESS) { profile->command_buffer.ptr = malloc(size); /*numa_alloc_onnode( @@ -344,11 +351,10 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, } } else { // Both the CPU and GPU can access the memory - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( ag_list_count, ag_list, NULL, profile->command_buffer.ptr); CHECK_HSA_STATUS("Error: Allowing access to Command Buffer", status); } - if (!is_spm) { status = HSA_STATUS_ERROR; size_t size = profile->output_buffer.size; @@ -358,8 +364,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, << "Error: Output buffer given size is " << size << std::endl; abort(); } - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( - agentInfo.kernarg_pool, size, 0, reinterpret_cast(&profile->output_buffer.ptr)); + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( + agentInfo.kernarg_pool_, size, 0, reinterpret_cast(&profile->output_buffer.ptr)); if (status != HSA_STATUS_SUCCESS) { profile->output_buffer.ptr = malloc(size); /*numa_alloc_onnode( @@ -372,7 +378,7 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, abort(); } } else { - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( ag_list_count, ag_list, NULL, profile->output_buffer.ptr); CHECK_HSA_STATUS("Error: GPU Agent can't have output buffer access", status); memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size); @@ -420,29 +426,34 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets( // Preparing an Getting the size of the command and output buffers status = hsa_ven_amd_aqlprofile_start(profile, NULL); - - Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + rocprofiler::HSAAgentInfo& agentInfo = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle); size_t ag_list_count = 1; hsa_agent_t ag_list[ag_list_count]; ag_list[0] = gpu_agent; // Allocating Command Buffer + //FixMe: Command buffer and output buffers are allocated repetatively. status = HSA_STATUS_ERROR; size_t size = profile->command_buffer.size; profile->command_buffer.ptr = nullptr; if (size <= 0) return nullptr; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( - agentInfo.cpu_pool, size, 0, reinterpret_cast(&(profile->command_buffer.ptr))); + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( + agentInfo.cpu_pool_, size, 0, reinterpret_cast(&(profile->command_buffer.ptr))); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( ag_list_count, ag_list, NULL, profile->command_buffer.ptr); CHECK_HSA_STATUS("Error: GPU Agent can't have command buffer access", status); } else { + hsa_agent_t near_cpu_node = agentInfo.GetNearCpuAgent(); + uint32_t near_cpu_node_id = 0; + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(near_cpu_node, + HSA_AGENT_INFO_NODE, &near_cpu_node_id); profile->command_buffer.ptr = numa_alloc_onnode( profile->command_buffer.size, - rocprofiler::hsa_support::GetAgentInfo(agentInfo.getNearCpuAgent().handle).getNumaNode()); + near_cpu_node_id); if (profile->command_buffer.ptr != nullptr) { status = HSA_STATUS_SUCCESS; } else { @@ -455,12 +466,12 @@ hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets( size = profile->output_buffer.size; profile->output_buffer.ptr = nullptr; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( - agentInfo.gpu_pool, size, 0, reinterpret_cast(&(profile->output_buffer.ptr))); + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( + agentInfo.gpu_pool_, size, 0, reinterpret_cast(&(profile->output_buffer.ptr))); CHECK_HSA_STATUS("Error: Can't Allocate Output Buffer", status); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( ag_list_count, ag_list, NULL, profile->output_buffer.ptr); CHECK_HSA_STATUS("Error: Can't allow access on the Output Buffer for the GPU", status); memset(profile->output_buffer.ptr, 0x0, profile->output_buffer.size); @@ -490,11 +501,12 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn( *cpu_pool, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_agents_allow_access_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agents_allow_access_fn( ag_list_count, ag_list, NULL, buffer); } uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; @@ -504,32 +516,33 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po // Allocate memory for use by a kernel of specified size uint8_t* AllocateLocalMemory(size_t size, hsa_amd_memory_pool_t* gpu_pool) { hsa_status_t status = HSA_STATUS_ERROR; + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(*gpu_pool, size, 0, reinterpret_cast(&buffer)); + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_allocate_fn(*gpu_pool, size, 0, reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } -hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile, - size_t att_buffer_size) { - Agent::AgentInfo& agentInfo = rocprofiler::hsa_support::GetAgentInfo(gpu_agent.handle); +hsa_status_t Allocate(hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_profile_t* profile, size_t att_buffer_size) { + rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent.handle); profile->command_buffer.ptr = - AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool); + AllocateSysMemory(gpu_agent, profile->command_buffer.size, &agentInfo.cpu_pool_); profile->output_buffer.size = att_buffer_size; profile->output_buffer.ptr = (g_output_buffer_local) - ? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool) - : AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool); + ? AllocateLocalMemory(profile->output_buffer.size, &agentInfo.gpu_pool_) + : AllocateSysMemory(gpu_agent, profile->output_buffer.size, &agentInfo.cpu_pool_); return (profile->command_buffer.ptr && profile->output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; } bool AllocateMemoryPools(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, hsa_amd_memory_pool_t* cpu_pool, hsa_amd_memory_pool_t* gpu_pool) { - hsa_status_t status = hsa_amd_agent_iterate_memory_pools(cpu_agent, FindStandardPool, cpu_pool); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(cpu_agent, FindStandardPool, cpu_pool); CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(cpu_pool)", status); - status = hsa_amd_agent_iterate_memory_pools(gpu_agent, FindStandardPool, gpu_pool); + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_agent_iterate_memory_pools_fn(gpu_agent, FindStandardPool, gpu_pool); CHECK_HSA_STATUS("hsa_amd_agent_iterate_memory_pools(gpu_pool)", status); return true; diff --git a/projects/rocprofiler/src/core/hsa/packets/packets_generator.h b/projects/rocprofiler/src/core/hsa/packets/packets_generator.h index b2aa5a893a..866fef3544 100644 --- a/projects/rocprofiler/src/core/hsa/packets/packets_generator.h +++ b/projects/rocprofiler/src/core/hsa/packets/packets_generator.h @@ -46,8 +46,8 @@ InitializeAqlPackets(hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool); void GetCommandBufferMap(std::map); void GetOutputBufferMap(std::map); -void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info); -void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info); +void InitializePools(hsa_agent_t cpu_agent, rocprofiler::HSAAgentInfo* agent_info); +void InitializeGPUPool(hsa_agent_t gpu_agent, rocprofiler::HSAAgentInfo* agent_info); hsa_ven_amd_aqlprofile_profile_t* InitializeDeviceProfilingAqlPackets( hsa_agent_t cpu_agent, hsa_agent_t gpu_agent, hsa_ven_amd_aqlprofile_event_t* events, uint32_t event_count, packet_t* start_packet, packet_t* stop_packet, packet_t* read_packet); @@ -65,8 +65,7 @@ uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_po void get_command_buffer_map(std::map); void get_outbuffer_map(std::map); -void initialize_pools(hsa_agent_t cpu_agent); -void CheckPacketReqiurements(std::vector& gpu_agents); +void CheckPacketReqiurements(); typedef struct { hsa_amd_memory_pool_t cpu_mem_pool; diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.cpp b/projects/rocprofiler/src/core/hsa/queues/queue.cpp index 202f5b1b43..4b8c2aa009 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.cpp +++ b/projects/rocprofiler/src/core/hsa/queues/queue.cpp @@ -35,6 +35,7 @@ #include "src/utils/helper.h" #include "src/core/isa_capture/code_object_track.hpp" + #define CHECK_HSA_STATUS(msg, status) \ do { \ if ((status) != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { \ @@ -74,37 +75,37 @@ static inline bool IsEventMatch(const hsa_ven_amd_aqlprofile_event_t& event1, typedef std::vector att_trace_callback_data_t; -static std::mutex ksymbol_map_lock; -static std::map* ksymbols; -static std::atomic ksymbols_flag{true}; + void AddKernelName(uint64_t handle, std::string name) { - std::lock_guard lock(ksymbol_map_lock); - ksymbols->emplace(handle, name); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.ksymbol_map_lock); + hsasupport_singleton.ksymbols->emplace(handle, name); } void RemoveKernelName(uint64_t handle) { - std::lock_guard lock(ksymbol_map_lock); - ksymbols->erase(handle); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.ksymbol_map_lock); + hsasupport_singleton.ksymbols->erase(handle); } std::string GetKernelNameFromKsymbols(uint64_t handle) { - std::lock_guard lock(ksymbol_map_lock); - if (ksymbols->find(handle) != ksymbols->end()) - return ksymbols->at(handle); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.ksymbol_map_lock); + if (hsasupport_singleton.ksymbols->find(handle) != hsasupport_singleton.ksymbols->end()) + return hsasupport_singleton.ksymbols->at(handle); else return "Unknown Kernel!"; } -static std::mutex kernel_names_map_lock; -static std::map>* kernel_names; -static std::atomic kernel_names_flag{true}; void AddKernelNameWithDispatchID(std::string name, uint64_t id) { - std::lock_guard lock(kernel_names_map_lock); - if (kernel_names->find(name) == kernel_names->end()) - kernel_names->emplace(name, std::vector()); - kernel_names->at(name).push_back(id); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.kernel_names_map_lock); + if (hsasupport_singleton.kernel_names->find(name) == hsasupport_singleton.kernel_names->end()) + hsasupport_singleton.kernel_names->emplace(name, std::vector()); + hsasupport_singleton.kernel_names->at(name).push_back(id); } std::string GetKernelNameUsingDispatchID(uint64_t given_id) { - std::lock_guard lock(kernel_names_map_lock); - for (auto kernel_name : (*kernel_names)) { + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.kernel_names_map_lock); + for (auto kernel_name : (*hsasupport_singleton.kernel_names)) { for (auto dispatch_id : kernel_name.second) { if (dispatch_id == given_id) return kernel_name.first; } @@ -112,34 +113,6 @@ std::string GetKernelNameUsingDispatchID(uint64_t given_id) { return "Unknown Kernel!"; } -void InitKsymbols() { - if (ksymbols_flag.load(std::memory_order_relaxed)) { - { - std::lock_guard lock(ksymbol_map_lock); - ksymbols = new std::map(); - ksymbols_flag.exchange(false, std::memory_order_release); - } - { - std::lock_guard lock(kernel_names_map_lock); - kernel_names = new std::map>(); - kernel_names_flag.exchange(false, std::memory_order_release); - } - } -} -void FinitKsymbols() { - if (!ksymbols_flag.load(std::memory_order_relaxed)) { - std::lock_guard lock(ksymbol_map_lock); - ksymbols->clear(); - delete ksymbols; - ksymbols_flag.exchange(true, std::memory_order_release); - } - if (!kernel_names_flag.load(std::memory_order_relaxed)) { - std::lock_guard lock(kernel_names_map_lock); - kernel_names->clear(); - delete kernel_names; - kernel_names_flag.exchange(true, std::memory_order_release); - } -} struct kernel_descriptor_t { @@ -185,7 +158,8 @@ enum amd_kernel_code_property_t { static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) { const kernel_descriptor_t* kernel_code = NULL; - hsa_status_t status = hsa_support::GetHSALoaderApi().hsa_ven_amd_loader_query_host_address( + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_status_t status = hsasupport_singleton.GetHSALoaderApi().hsa_ven_amd_loader_query_host_address( reinterpret_cast(kernel_object), reinterpret_cast(&kernel_code)); if (HSA_STATUS_SUCCESS != status) { kernel_code = reinterpret_cast(kernel_object); @@ -193,8 +167,8 @@ static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) { return kernel_code; } -static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) { - const std::string_view& name = info.getName(); +static uint32_t arch_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) { + std::string info_name(name.data(), name.size()); if (strcmp(name.data(), "gfx90a") == 0 || strncmp(name.data(), "gfx94", 5) == 0) return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc3, @@ -210,23 +184,23 @@ static uint32_t arch_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_ ? 8 : 4); } -static uint32_t accum_vgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) { - const std::string_view& name = info.getName(); +static uint32_t accum_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) { + std::string info_name(name.data(), name.size()); - if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(info, kernel_code); + if (strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(name, kernel_code); if (strcmp(info_name.c_str(), "gfx90a") == 0 || strncmp(info_name.c_str(), "gfx94", 5) == 0) return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) + 1) * 8 - - arch_vgpr_count(info, kernel_code); + arch_vgpr_count(name, kernel_code); return 0; } -static uint32_t sgpr_count(Agent::AgentInfo& info, const kernel_descriptor_t& kernel_code) { +static uint32_t sgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) { // GFX10 and later always allocate 128 sgprs. - const std::string_view name = info.getName(); + // TODO(srnagara): Recheck the extraction of gfxip from gpu name const char* name_data = name.data(); const size_t gfxip_label_len = std::min(name.size() - 2, size_t{63}); @@ -259,10 +233,10 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet kernel_properties_ptr.workgroup_size = (uint32_t)workgroup_size; kernel_properties_ptr.lds_size = packet.group_segment_size; kernel_properties_ptr.scratch_size = packet.private_segment_size; - Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(agent.handle); - kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info, *kernel_code); - kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info, *kernel_code); - kernel_properties_ptr.sgpr_count = sgpr_count(agent_info, *kernel_code); + HSAAgentInfo agent_info = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle); + kernel_properties_ptr.arch_vgpr_count = arch_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code); + kernel_properties_ptr.accum_vgpr_count = accum_vgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code); + kernel_properties_ptr.sgpr_count = sgpr_count(agent_info.GetDeviceInfo().getName(), *kernel_code); kernel_properties_ptr.wave_size = AMD_HSA_BITS_GET(kernel_code->kernel_code_properties, AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) @@ -275,9 +249,7 @@ rocprofiler_kernel_properties_t set_kernel_properties(hsa_kernel_dispatch_packet namespace queue { -using rocprofiler::GetROCProfilerSingleton; - -hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) { hsa_status_t status = HSA_STATUS_SUCCESS; pmc_callback_data_t* passed_data = reinterpret_cast(data); @@ -330,7 +302,7 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign rocprofiler_record_counter_value_t{value}}); } record->counters = counters; - rocprofiler::Session* session = GetROCProfilerSingleton()->GetSession(pending->session_id); + rocprofiler::Session* session = rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(pending->session_id); void* initial_handle = const_cast(record->counters); if (session->FindBuffer(pending->buffer_id)) { Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id); @@ -347,7 +319,8 @@ void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_sign void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent, att_pending_signal_t& pending) { - Agent::AgentInfo agent_info = hsa_support::GetAgentInfo(gpu_agent.handle); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + HSAAgentInfo agent_info = hsasupport_singleton.GetHSAAgentInfo(gpu_agent.handle); att_trace_callback_data_t data; hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(pending.profile, attTraceDataCallback, &data); @@ -373,11 +346,11 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent void* buffer = NULL; if (data_size != 0) { // Allocate buffer on CPU to copy out trace data - buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool); + buffer = Packet::AllocateSysMemory(gpu_agent, data_size, &agent_info.cpu_pool_); if (buffer == NULL) fatal("Trace data buffer allocation failed"); - auto status = rocprofiler::hsa_support::GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, - data_size); + auto status = + hsasupport_singleton.GetCoreApiTable().hsa_memory_copy_fn(buffer, data_ptr, data_size); if (status != HSA_STATUS_SUCCESS) fatal("Trace data memcopy to host failed"); record->shader_engine_data[se_index].buffer_ptr = buffer; @@ -392,12 +365,13 @@ void AddAttRecord(rocprofiler_record_att_tracer_t* record, hsa_agent_t gpu_agent bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { auto queue_info_session = static_cast(data); - if (!queue_info_session || !GetROCProfilerSingleton() || - !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id) || - !GetROCProfilerSingleton()->GetSession(queue_info_session->session_id)->GetProfiler()) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + if (!queue_info_session || + !rocprofiler_singleton.GetSession(queue_info_session->session_id) || + !rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetProfiler()) return true; - rocprofiler::Session* session = - GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); + rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id); std::lock_guard lock(session->GetSessionLock()); rocprofiler::profiler::Profiler* profiler = session->GetProfiler(); std::vector pending_signals = const_cast&>( @@ -407,10 +381,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { for (auto it = pending_signals.begin(); it != pending_signals.end(); it = pending_signals.erase(it)) { auto& pending = *it; - if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) - return true; + if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending->new_signal)) return true; hsa_amd_profiling_dispatch_time_t time; - hsa_support::GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( + hsasupport_singleton.GetAmdExtTable().hsa_amd_profiling_get_dispatch_time_fn( queue_info_session->agent, pending->original_signal, &time); uint32_t record_count = 1; bool is_individual_xcc_mode = false; @@ -440,7 +413,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { record.correlation_id = rocprofiler_correlation_id_t{pending->correlation_id}; if (pending->session_id.handle == 0) { - pending->session_id = GetROCProfilerSingleton()->GetCurrentSessionId(); + pending->session_id = rocprofiler_singleton.GetCurrentSessionId(); } if (pending->counters_count > 0) { if (xcc_id == 0 && pending->context && pending->context->metrics_list.size() > 0 && @@ -456,7 +429,7 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { pending->context->metrics_list, time.end - time.start); AddRecordCounters(&record, pending); - } else { + }else { if (session->FindBuffer(pending->buffer_id)) { Memory::GenericBuffer* buffer = session->GetBuffer(pending->buffer_id); buffer->AddRecord(record); @@ -467,13 +440,12 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { // TODO(aelwazir): we need a better way of distributing events and free them // if (pending->profile->output_buffer.ptr) // numa_free(pending->profile->output_buffer.ptr, pending->profile->output_buffer.size); - hsa_status_t status = - rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( - (pending->profile->output_buffer.ptr)); + hsa_status_t status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn( + (pending->profile->output_buffer.ptr)); CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status); // if (pending->profile->command_buffer.ptr) // numa_free(pending->profile->command_buffer.ptr, pending->profile->command_buffer.size); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( + status =hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn( (pending->profile->command_buffer.ptr)); CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status); delete pending->profile; @@ -483,9 +455,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { delete pending->context; } if (pending->new_signal.handle) - hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal); + hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(pending->new_signal); if (queue_info_session->interrupt_signal.handle) - hsa_support::GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal); + hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn(queue_info_session->interrupt_signal); } } delete queue_info_session; @@ -496,15 +468,13 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) { bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { auto queue_info_session = static_cast(data); - if (!queue_info_session || !GetROCProfilerSingleton()) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + if (!queue_info_session || + !rocprofiler_singleton.GetSession(queue_info_session->session_id) || + !rocprofiler_singleton.GetSession(queue_info_session->session_id)->GetAttTracer()) return true; - - rocprofiler::Session* session = - GetROCProfilerSingleton()->GetSession(queue_info_session->session_id); - if (!session) return true; - - std::lock_guard lock(session->GetSessionLock()); - + rocprofiler::Session* session = rocprofiler_singleton.GetSession(queue_info_session->session_id); rocprofiler::att::AttTracer* att_tracer = session->GetAttTracer(); if (!session->GetAttTracer()) return true; @@ -516,9 +486,8 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { for (auto it = pending_signals.begin(); it != pending_signals.end(); it = pending_signals.erase(it)) { auto& pending = *it; - if (hsa_support::GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) - return true; - + std::lock_guard lock(session->GetSessionLock()); + if (hsasupport_singleton.GetCoreApiTable().hsa_signal_load_relaxed_fn(pending.new_signal)) return true; rocprofiler_record_att_tracer_t record{}; record.kernel_id = rocprofiler_kernel_id_t{pending.kernel_descriptor}; record.gpu_id = rocprofiler_agent_id_t{(uint64_t)queue_info_session->gpu_index}; @@ -540,7 +509,7 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { std::atomic_thread_fence(std::memory_order_release); if (pending.session_id.handle == 0) { - pending.session_id = GetROCProfilerSingleton()->GetCurrentSessionId(); + pending.session_id = rocprofiler_singleton.GetCurrentSessionId(); } if (session->FindBuffer(pending.buffer_id)) { Memory::GenericBuffer* buffer = session->GetBuffer(pending.buffer_id); @@ -549,10 +518,10 @@ bool AsyncSignalHandlerATT(hsa_signal_value_t /* signal */, void* data) { } codeobj_record::free_capture(record.header.id); - hsa_status_t status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( + hsa_status_t status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn( (pending.profile->output_buffer.ptr)); CHECK_HSA_STATUS("Error: Couldn't free output buffer memory", status); - status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_pool_free_fn( + status = hsasupport_singleton.GetAmdExtTable().hsa_amd_memory_pool_free_fn( (pending.profile->command_buffer.ptr)); CHECK_HSA_STATUS("Error: Couldn't free command buffer memory", status); delete pending.profile; @@ -580,20 +549,20 @@ void AddVendorSpecificPacket(const Packet::packet_t* packet, } void SignalAsyncHandler(const hsa_signal_t& signal, void* data) { - hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn( + hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn( signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandler, data); CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler failed", status); } void signalAsyncHandlerATT(const hsa_signal_t& signal, void* data) { - hsa_status_t status = hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn( + hsa_status_t status = HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_async_handler_fn( signal, HSA_SIGNAL_CONDITION_EQ, 0, AsyncSignalHandlerATT, data); CHECK_HSA_STATUS("Error: hsa_amd_signal_async_handler for ATT failed", status); } void CreateSignal(uint32_t attribute, hsa_signal_t* signal) { hsa_status_t status = - hsa_support::GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal); + HSASupport_Singleton::GetInstance().GetAmdExtTable().hsa_amd_signal_create_fn(1, 0, nullptr, attribute, signal); CHECK_HSA_STATUS("Error: hsa_amd_signal_create failed", status); } @@ -635,17 +604,16 @@ void ResetSessionID(rocprofiler_session_id_t id) { session_id = id; } void CheckNeededProfileConfigs() { rocprofiler_session_id_t internal_session_id; - if (GetROCProfilerSingleton()) // Getting Session ID - internal_session_id = GetROCProfilerSingleton()->GetCurrentSessionId(); - else - internal_session_id = {0}; + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + internal_session_id = rocprofiler_singleton.GetCurrentSessionId(); + if (session_id.handle == 0 || internal_session_id.handle != session_id.handle) { session_id = internal_session_id; // Getting Counters count from the Session - if (session_id.handle > 0 && GetROCProfilerSingleton()) { - session = GetROCProfilerSingleton()->GetSession(session_id); + if (session_id.handle > 0 ) { + session = rocprofiler_singleton.GetSession(session_id); if (session && session->FindFilterWithKind(ROCPROFILER_COUNTERS_COLLECTION)) { rocprofiler_filter_id_t filter_id = session->GetFilterIdWithKind(ROCPROFILER_COUNTERS_COLLECTION); @@ -690,9 +658,9 @@ std::pair, bool> GetAllowedProfilesList(const void* packets, i std::vector can_profile_packet; bool b_can_profile_anypacket = false; can_profile_packet.reserve(pkt_count); - - std::lock_guard lock(ksymbol_map_lock); - assert(ksymbols); + rocprofiler::HSASupport_Singleton& hsasupport_singleton = rocprofiler::HSASupport_Singleton::GetInstance(); + std::lock_guard lock(hsasupport_singleton.ksymbol_map_lock); + assert(hsasupport_singleton.ksymbols); uint32_t current_writer_id = WRITER_ID.load(std::memory_order_relaxed); @@ -710,7 +678,7 @@ std::pair, bool> GetAllowedProfilesList(const void* packets, i for (auto id : kernel_profile_dispatch_ids) b_profile_this_object |= id == current_writer_id; try { // Can throw - const std::string& kernel_name = ksymbols->at(kdispatch.kernel_object); + const std::string& kernel_name = hsasupport_singleton.ksymbols->at(kdispatch.kernel_object); // If no filters specified, auto profile this kernel if (kernel_profile_names.size() == 0 && kernel_profile_dispatch_ids.size() == 0 && @@ -739,7 +707,7 @@ ProcessATTParams( Packet::packet_t& start_packet, Packet::packet_t& stop_packet, Queue& queue_info, - Agent::AgentInfo& agentInfo + rocprofiler::HSAAgentInfo& agentInfo ) { std::vector att_params; int num_att_counters = 0; @@ -805,7 +773,7 @@ ProcessATTParams( * pointer to the packet. This packet is written into the queue by this * interceptor by invoking the writer function. */ -void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data, +void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, void* data, hsa_amd_queue_intercept_packet_writer writer) { static const char* env_MAX_ATT_PROFILES = getenv("ROCPROFILER_MAX_ATT_PROFILES"); @@ -882,7 +850,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt rocprofiler_kernel_properties_t kernel_properties = set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent()); if (session) { - uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId(); + uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId(); AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object), record_id); if (session_data_count > 0 && profile.second) { @@ -936,19 +904,21 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt (reinterpret_cast(&barrier)); transformed_packets.emplace_back(*pkt); } - Agent::AgentInfo& agentInfo = - rocprofiler::hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle); + rocprofiler::HSAAgentInfo& agentInfo = + rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle); // Creating Async Handler to be called every time the interrupt signal is // marked complete SignalAsyncHandler( interrupt_signal, - new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, - queue_info.GetQueueID(), writer_id, interrupt_signal, - agentInfo.getIndex(), agentInfo.getXccCount()}); + new queue_info_session_t{queue_info.GetGPUAgent(), session_id_snapshot, queue_info.GetQueueID(), + writer_id, interrupt_signal, agentInfo.GetDeviceInfo().getGPUId(), + agentInfo.GetDeviceInfo().getXccCount()}); ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_add(1, std::memory_order_relaxed); // profile_id++; // } while (replay_mode_count > 0 && profile_id < replay_mode_count); // Profiles loop end + } + /* Write the transformed packets to the hardware queue. */ writer(&transformed_packets[0], transformed_packets.size()); } else if (session_id_snapshot.handle > 0 && pkt_count > 0 && is_att_collection_mode && session && @@ -957,7 +927,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // Getting Queue Data and Information auto& queue_info = *static_cast(data); std::lock_guard lk(queue_info.qw_mutex); - Agent::AgentInfo agentInfo = hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle); + rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance().GetHSAAgentInfo(queue_info.GetGPUAgent().handle); bool can_profile_anypacket = false; std::vector can_profile_packet; @@ -1021,7 +991,7 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt // list to be processed by the signal interrupt rocprofiler_kernel_properties_t kernel_properties = set_kernel_properties(dispatch_packet, queue_info.GetGPUAgent()); - uint64_t record_id = GetROCProfilerSingleton()->GetUniqueRecordId(); + uint64_t record_id = rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId(); AddKernelNameWithDispatchID(GetKernelNameFromKsymbols(dispatch_packet.kernel_object), record_id); @@ -1080,31 +1050,13 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt /* Write the original packets to the hardware queue if no profiling session * is active */ writer(packets, pkt_count); + } } -Queue::Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data, - uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue) - : cpu_agent_(cpu_agent), gpu_agent_(gpu_agent) { - [[maybe_unused]] hsa_status_t status = - hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_create_fn( - gpu_agent, size, type, callback, data, private_segment_size, group_segment_size, - &intercept_queue_); - assert(status == HSA_STATUS_SUCCESS); - - status = hsa_support::GetAmdExtTable().hsa_amd_profiling_set_profiler_enabled_fn(intercept_queue_, - true); - assert(status == HSA_STATUS_SUCCESS); - - hsa_support::GetAmdExtTable().hsa_amd_queue_intercept_register_fn(intercept_queue_, - WriteInterceptor, this); - assert(status == HSA_STATUS_SUCCESS); - - *queue = intercept_queue_; -} +Queue::Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent, hsa_queue_t* queue) + : cpu_agent_(cpu_agent), gpu_agent_(gpu_agent), intercept_queue_(queue) { } Queue::~Queue() { while (ACTIVE_INTERRUPT_SIGNAL_COUNT.load(std::memory_order_acquire) > 0) { @@ -1119,15 +1071,8 @@ hsa_agent_t Queue::GetCPUAgent() { return cpu_agent_; } uint64_t Queue::GetQueueID() { return intercept_queue_->id; } -void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info) { - Packet::InitializePools(cpu_agent, agent_info); -} -void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info) { - Packet::InitializeGPUPool(gpu_agent, agent_info); -} -void CheckPacketReqiurements(std::vector& gpu_agents) { - Packet::CheckPacketReqiurements(gpu_agents); -} +void CheckPacketReqiurements() { + Packet::CheckPacketReqiurements();} } // namespace queue } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.h b/projects/rocprofiler/src/core/hsa/queues/queue.h index b4f8b96eb8..11e1111d64 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.h +++ b/projects/rocprofiler/src/core/hsa/queues/queue.h @@ -39,8 +39,7 @@ namespace rocprofiler { -void InitKsymbols(); -void FinitKsymbols(); + void AddKernelName(uint64_t handle, std::string kernel_name); void RemoveKernelName(uint64_t handle); void AddKernelNameWithDispatchID(std::string name, uint64_t id); @@ -52,12 +51,12 @@ namespace queue { class Queue { public: - Queue(const hsa_agent_t& cpu_agent, const hsa_agent_t& gpu_agent, uint32_t size, - hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data, - uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue); + Queue(const hsa_agent_t cpu_agent, const hsa_agent_t gpu_agent, + hsa_queue_t* queue); ~Queue(); + static void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt_index, + void* data, hsa_amd_queue_intercept_packet_writer writer); hsa_queue_t* GetCurrentInterceptQueue(); hsa_agent_t GetGPUAgent(); hsa_agent_t GetCPUAgent(); @@ -69,7 +68,6 @@ class Queue { std::mutex mutex_; hsa_agent_t cpu_agent_; hsa_agent_t gpu_agent_; - hsa_queue_t* original_queue_; hsa_queue_t* intercept_queue_; hsa_status_t pmcCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, @@ -88,12 +86,10 @@ struct queue_info_session_t { void AddRecordCounters(rocprofiler_record_profiler_t* record, const pending_signal_t& pending); -void InitializePools(hsa_agent_t cpu_agent, Agent::AgentInfo* agent_info); -void InitializeGPUPool(hsa_agent_t gpu_agent, Agent::AgentInfo* agent_info); -void CheckPacketReqiurements(std::vector& gpu_agents); - void ResetSessionID(rocprofiler_session_id_t id = rocprofiler_session_id_t{0}); +void CheckPacketReqiurements(); + } // namespace queue } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/isa_capture/code_object_track.cpp b/projects/rocprofiler/src/core/isa_capture/code_object_track.cpp index 7ffe2b0846..8357b5bfbb 100644 --- a/projects/rocprofiler/src/core/isa_capture/code_object_track.cpp +++ b/projects/rocprofiler/src/core/isa_capture/code_object_track.cpp @@ -77,7 +77,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6 uint64_t mem_size) { std::lock_guard lock(mutex); codeobjs[addr] = std::make_shared( - addr, URI, mem_addr, mem_size, rocprofiler::GetCurrentTimestamp().value); + addr, URI, mem_addr, mem_size, rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value); std::atomic_thread_fence(std::memory_order_release); // Fencing the state of the map { std::lock_guard lock(codeobj_record::mutex); @@ -87,7 +87,7 @@ void codeobj_capture_instance::Load(uint64_t addr, const std::string& URI, uint6 void codeobj_capture_instance::Unload(uint64_t addr) { std::lock_guard lock(mutex); - codeobjs.at(addr)->end_time = rocprofiler::GetCurrentTimestamp().value; + codeobjs.at(addr)->end_time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value; codeobjs.erase(addr); } diff --git a/projects/rocprofiler/src/core/memory/generic_buffer.cpp b/projects/rocprofiler/src/core/memory/generic_buffer.cpp index 445335fdf2..fdb5b11bb3 100644 --- a/projects/rocprofiler/src/core/memory/generic_buffer.cpp +++ b/projects/rocprofiler/src/core/memory/generic_buffer.cpp @@ -69,8 +69,9 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu GenericBuffer::~GenericBuffer() { if (is_valid_.load(std::memory_order_acquire)) { std::lock_guard lock(buffer_lock_); - // if (rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)) - // rocprofiler::GetROCProfiler_Singleton()->GetSession(session_id_)->DisableTools(id_); + //rocprofiler::ROCProfiler_Singleton& instance = rocprofiler::ROCProfiler_Singleton::GetInstance(); + //if (instance.GetSession(session_id_)) + // instance.GetSession(session_id_)->DisableTools(id_); Flush(); diff --git a/projects/rocprofiler/src/core/session/counters_sampler.cpp b/projects/rocprofiler/src/core/session/counters_sampler.cpp index 9b726847b4..870662a009 100644 --- a/projects/rocprofiler/src/core/session/counters_sampler.cpp +++ b/projects/rocprofiler/src/core/session/counters_sampler.cpp @@ -35,17 +35,17 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id, pci_system_initialized_(pci_system_init() == 0) { - params_ = rocprofiler::GetROCProfilerSingleton() - ->GetSession(session_id_) + params_ = rocprofiler::ROCProfiler_Singleton::GetInstance() + .GetSession(session_id_) ->GetFilter(filter_id_) ->GetCountersSamplerParameterData(); std::vector agents; - rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn( + HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_iterate_agents_fn( [](hsa_agent_t agent, void* arg) { auto& agents = *reinterpret_cast*>(arg); - const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle); - if (ai.getType() == HSA_DEVICE_TYPE_GPU) { + const auto& ai = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agent.handle); + if (ai.GetType() == HSA_DEVICE_TYPE_GPU) { agents.emplace_back(agent); } return HSA_STATUS_SUCCESS; @@ -62,8 +62,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id, } if (pcie_counter_names.size() > 0) { - auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle); - if (agentInfo.getName() == "gfx90a") { + auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle); + if (agentInfo.GetDeviceInfo().getName()== "gfx90a") { PciePerfMonMI200* perfmon = new PciePerfMonMI200(agentInfo); perfmon->SetCounterNames(pcie_counter_names); perfmon_instances_.push_back(perfmon); @@ -77,8 +77,8 @@ CountersSampler::CountersSampler(rocprofiler_buffer_id_t buffer_id, } if (xgmi_counter_names.size() > 0) { - auto agentInfo = rocprofiler::hsa_support::GetAgentInfo(agents[params_.gpu_agent_index].handle); - if (agentInfo.getName() == "gfx90a") { + auto agentInfo = HSASupport_Singleton::GetInstance().GetHSAAgentInfo(agents[params_.gpu_agent_index].handle); + if (agentInfo.GetDeviceInfo().getName() == "gfx90a") { DFPerfMonMI200* perfmon = new DFPerfMonMI200(agentInfo); perfmon->SetCounterNames(xgmi_counter_names); perfmon_instances_.push_back(perfmon); @@ -132,13 +132,13 @@ void CountersSampler::Stop() { } void CountersSampler::AddRecord(rocprofiler_record_counters_sampler_t& record) { - const auto tool = rocprofiler::GetROCProfilerSingleton(); - const auto session = tool->GetSession(session_id_); + rocprofiler::ROCProfiler_Singleton& tool = rocprofiler::ROCProfiler_Singleton::GetInstance(); + const auto session = tool.GetSession(session_id_); const auto buffer = session->GetBuffer(buffer_id_); std::lock_guard lk(session->GetSessionLock()); - record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool->GetUniqueRecordId()}}; + record.header = {ROCPROFILER_COUNTERS_SAMPLER_RECORD, {tool.GetUniqueRecordId()}}; // Add the record to the buffer(a deep-copy operation) along with // a lambda function to deep-copy the record.counters member to diff --git a/projects/rocprofiler/src/core/session/device_profiling.cpp b/projects/rocprofiler/src/core/session/device_profiling.cpp index 9f28d7af2f..5f7855b8c5 100644 --- a/projects/rocprofiler/src/core/session/device_profiling.cpp +++ b/projects/rocprofiler/src/core/session/device_profiling.cpp @@ -9,7 +9,7 @@ #include "src/utils/exception.h" #include "src/core/hsa/queues/queue.h" // #include "src/core/counters/rdc/rdc_metrics.h" -#include "src/core/hsa/hsa_common.h" + #include #include @@ -183,8 +183,8 @@ DeviceProfileSession::DeviceProfileSession(std::vector profiling_da if (hsa_agent_get_info(gpu_agent_, HSA_AGENT_INFO_NAME, gpu_name) != HSA_STATUS_SUCCESS) fatal("Agent name query failed"); - Agent::AgentInfo* agentInfo = &(hsa_support::GetAgentInfo(gpu_agent_.handle)); - metrics_dict_ = MetricsDict::Create(agentInfo); + HSAAgentInfo agentInfo = (HSASupport_Singleton::GetInstance().GetHSAAgentInfo(gpu_agent_.handle)); + metrics_dict_ = MetricsDict::Create(&agentInfo); for (auto& d : profiling_data_) { Metric* metric = const_cast(metrics_dict_->Get(d)); diff --git a/projects/rocprofiler/src/core/session/spm/spm.cpp b/projects/rocprofiler/src/core/session/spm/spm.cpp index b8edd53a5e..32482a3f6d 100644 --- a/projects/rocprofiler/src/core/session/spm/spm.cpp +++ b/projects/rocprofiler/src/core/session/spm/spm.cpp @@ -157,7 +157,7 @@ std::mutex processQueueLock; // std::vector timestamp_vec; // // Get Buffer // rocprofiler::Session* session = -// rocprofiler::GetROCProfilerSingleton()->GetSession(rocprofiler::GetROCProfilerSingleton()->GetCurrentSessionId()); +// rocprofiler::ROCProfiler_Singleton::GetInstance().GetSession(rocprofiler::rocmtool::GetInstance().GetCurrentSessionId()); // rocprofiler_filter_id_t filter_id = session->GetFilterIdWithKind(ROCPROFILER_SPM_COLLECTION); // rocprofiler::Filter* filter = session->GetFilter(filter_id); // rocprofiler_buffer_id_t buffer_id = filter->GetBufferId(); @@ -188,8 +188,7 @@ std::mutex processQueueLock; // } // se++; // } -// record.header.id = -// rocprofiler_record_id_t{rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}; +// record.header.id = rocprofiler_record_id_t{rocprofiler::ROCProfiler_Singleton::GetInstance().GetUniqueRecordId()}; // buffer->AddRecord(record); // nSample++; // index += 160; @@ -241,11 +240,10 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) { // advance command queue const uint64_t write_idx = - rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, - 1); + rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_add_write_index_scacq_screl_fn(queue, 1); while ((write_idx - - rocprofiler::hsa_support::GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn( - queue)) >= queue->size) { + rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_load_read_index_relaxed_fn(queue)) >= + queue->size) { sched_yield(); // TODO: remove } @@ -263,8 +261,8 @@ uint64_t submitPacket(hsa_queue_t* queue, const void* packet) { header_atomic_ptr->store(slot_data[0], std::memory_order_release); // ringdoor bell - rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal, - write_idx); + rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_store_relaxed_fn(queue->doorbell_signal, + write_idx); return write_idx; } @@ -290,7 +288,7 @@ hsa_signal_value_t signalWait(const hsa_signal_t& signal, const hsa_signal_value // Probably a maximum wait time should be set. We don't want application to hang because of // unlimited wait. // TODO2 : try 500000 assuming nanosecond granularity -- must be verified. - ret_value = rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_wait_scacquire_fn( + ret_value = rocprofiler::HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_wait_scacquire_fn( signal, HSA_SIGNAL_CONDITION_LT, signal_value, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); if (ret_value == exp_value) break; @@ -321,9 +319,9 @@ spm::SpmCounters::SpmCounters(rocprofiler_buffer_id_t buffer_id, rocprofiler_fil // create signals hsa_status_t status = - hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_); + HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &start_signal_); if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed"); - status = hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_); + status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL, &stop_signal_); if (status != HSA_STATUS_SUCCESS) fatal("start signal creation failed"); is_started.store(false, std::memory_order_relaxed); buffer_read_flag.store(false, std::memory_order_relaxed); @@ -417,7 +415,7 @@ rocprofiler_status_t spm::SpmCounters::stopSpm() { hsa_signal_store_screlease(stop_signal_, 1); hsa_status_t status = HSA_STATUS_SUCCESS; if (queue_ != nullptr) { - status = hsa_support::GetCoreApiTable().hsa_queue_destroy_fn(queue_); + status = HSASupport_Singleton::GetInstance().GetCoreApiTable().hsa_queue_destroy_fn(queue_); queue_ = nullptr; } if (status != HSA_STATUS_SUCCESS) rocprofiler::warning("Queue destroy failed"); diff --git a/projects/rocprofiler/src/core/session/tracer/src/roctracer.cpp b/projects/rocprofiler/src/core/session/tracer/src/roctracer.cpp index 888bebc29d..363108deda 100644 --- a/projects/rocprofiler/src/core/session/tracer/src/roctracer.cpp +++ b/projects/rocprofiler/src/core/session/tracer/src/roctracer.cpp @@ -237,34 +237,33 @@ template struct ApiTracer { }; static void Exit(OperationId operation_id, TraceData* trace_data) { - if (rocprofiler::GetROCProfilerSingleton()) { - if (auto pool = activity_table.Get(operation_id)) { - if (rocprofiler::GetROCProfilerSingleton() && - rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) && - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = + rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (auto pool = activity_table.Get(operation_id)) { + if (rocprofiler_singleton.GetSession((*pool)->session_id) && + rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id)) { - if (rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + if (rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->IsValid()) { - std::lock_guard lock(rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + std::lock_guard lock(rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->GetBufferLock()); assert(trace_data != nullptr); rocprofiler_record_tracer_t record{}; record.header = rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}; + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}; record.domain = domain; record.operation_id = rocprofiler_tracer_operation_id_t{operation_id}; record.correlation_id = rocprofiler_tracer_activity_correlation_id_t{trace_data->api_data.correlation_id}; record.timestamps = rocprofiler_record_header_timestamp_t{ rocprofiler_timestamp_t{trace_data->phase_enter_timestamp}, - hsa_support::timestamp_ns()}; + rocprofiler_singleton.timestamp_ns()}; record.thread_id = rocprofiler_thread_id_t{GetTid()}; record.phase = ROCPROFILER_PHASE_NONE; @@ -272,8 +271,7 @@ template struct ApiTracer { rocprofiler_record_tracer_t ext_record{}; ext_record.header = rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}; + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}; ext_record.domain = ACTIVITY_DOMAIN_EXT_API; ext_record.operation_id = rocprofiler_tracer_operation_id_t{ACTIVITY_EXT_OP_EXTERN_ID}; @@ -283,21 +281,20 @@ template struct ApiTracer { ext_record.phase = ROCPROFILER_PHASE_NONE; // Write the external correlation id record directly followed by the // activity record. - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->AddRecord(std::array{ext_record, record}); } else { // Write record to the buffer. - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->AddRecord(record); } } } } - } CorrelationIdPop(); } @@ -332,7 +329,7 @@ template struct ApiTracer { trace_data->api_data.correlation_id = CorrelationIdPush(); if (activity_enabled) { - trace_data->phase_enter_timestamp = hsa_support::timestamp_ns().value; + trace_data->phase_enter_timestamp = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value; trace_data->phase_enter = nullptr; trace_data->phase_exit = Exit; } @@ -365,6 +362,7 @@ ActivityRegistrationTable hsa_ops_activity_t CallbackRegistrationTable hsa_evt_callback_table; int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) { + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); switch (domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_ApiTracer::Enter(static_cast(operation_id), @@ -380,50 +378,47 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) // If the record is for a kernel dispatch, write the kernel name in the pool's data, // and make the record point to it. Older HIP runtimes do not provide a kernel name, // so record.kernel_name might be null. - if (!rocprofiler::GetROCProfilerSingleton()) return 0; - if (rocprofiler::GetROCProfilerSingleton() && - rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) && - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) - ->GetBuffer((*pool)->buffer_id)) { - std::lock_guard lock(rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) - ->GetBuffer((*pool)->buffer_id) - ->GetBufferLock()); - rocprofiler_record_tracer_t rocprofiler_record{}; - rocprofiler_record.header = rocprofiler_record_header_t{ - ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}; - rocprofiler_record.domain = domain; - rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{}; - rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind}; - rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{}; - rocprofiler_record.correlation_id = - rocprofiler_tracer_activity_correlation_id_t{record->correlation_id}; - rocprofiler_record.timestamps = rocprofiler_record_header_timestamp_t{ - rocprofiler_timestamp_t{record->begin_ns}, rocprofiler_timestamp_t{record->end_ns}}; - rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id}; - rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id}; - rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()}; - rocprofiler_record.phase = ROCPROFILER_PHASE_NONE; - if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) { - rocprofiler_record.name = record->kernel_name; - size_t kernel_name_size = (strlen(record->kernel_name) + 1); - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) - ->GetBuffer((*pool)->buffer_id) - ->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size, - [](auto& rocprofiler_record, const void* data) { - rocprofiler_record.name = static_cast(data); - }); - } else { - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) - ->GetBuffer((*pool)->buffer_id) - ->AddRecord(rocprofiler_record); + if (rocprofiler_singleton.GetSession((*pool)->session_id) && + rocprofiler_singleton.GetSession((*pool)->session_id) + ->GetBuffer((*pool)->buffer_id)) { + std::lock_guard lock( + rocprofiler_singleton.GetSession((*pool)->session_id) + ->GetBuffer((*pool)->buffer_id) + ->GetBufferLock()); + + rocprofiler_record_tracer_t rocprofiler_record{}; + rocprofiler_record.header = rocprofiler_record_header_t{ + ROCPROFILER_TRACER_RECORD, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}; + rocprofiler_record.domain = domain; + rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{}; + rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->kind}; + rocprofiler_record.api_data = rocprofiler_tracer_api_data_t{}; + rocprofiler_record.correlation_id = + rocprofiler_tracer_activity_correlation_id_t{record->correlation_id}; + rocprofiler_record.timestamps = + rocprofiler_record_header_timestamp_t{rocprofiler_timestamp_t{record->begin_ns}, + rocprofiler_timestamp_t{record->end_ns}}; + rocprofiler_record.agent_id = rocprofiler_agent_id_t{(uint64_t)record->device_id}; + rocprofiler_record.queue_id = rocprofiler_queue_id_t{record->queue_id}; + rocprofiler_record.thread_id = rocprofiler_thread_id_t{GetTid()}; + rocprofiler_record.phase = ROCPROFILER_PHASE_NONE; + if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr) { + rocprofiler_record.name = record->kernel_name; + size_t kernel_name_size = (strlen(record->kernel_name) + 1); + + rocprofiler_singleton.GetSession((*pool)->session_id) + ->GetBuffer((*pool)->buffer_id) + ->AddRecord(rocprofiler_record, rocprofiler_record.name, kernel_name_size, + [](auto& rocprofiler_record, const void* data) { + rocprofiler_record.name = static_cast(data); + }); + } else { + rocprofiler_singleton.GetSession((*pool)->session_id) + ->GetBuffer((*pool)->buffer_id) + ->AddRecord(rocprofiler_record); + } } - } } return 0; } @@ -438,19 +433,18 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) user_callback->second); return 0; } else { - if (!rocprofiler::GetROCProfilerSingleton()) return 0; - if (rocprofiler::GetROCProfilerSingleton() && - rocprofiler::GetROCProfilerSingleton()->GetSession( + + if ( + rocprofiler_singleton.GetSession( reinterpret_cast(user_callback->second)->session_id) && - rocprofiler::GetROCProfilerSingleton() - ->GetSession( + rocprofiler_singleton.GetSession( reinterpret_cast(user_callback->second)->session_id) ->GetBuffer( reinterpret_cast(user_callback->second)->buffer_id)) { if (auto api_data = static_cast::ApiData*>(data)) { std::lock_guard lock( - rocprofiler::GetROCProfilerSingleton() - ->GetSession( + rocprofiler_singleton. + GetSession( reinterpret_cast(user_callback->second)->session_id) ->GetBuffer( reinterpret_cast(user_callback->second)->buffer_id) @@ -460,13 +454,13 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{api_data ? api_data->args.id : 0}, ACTIVITY_DOMAIN_ROCTX, rocprofiler_tracer_operation_id_t{operation_id}, tracer_api_data, rocprofiler_tracer_activity_correlation_id_t{0}, - rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(), + rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(), rocprofiler_timestamp_t{0}}, 0, 0, @@ -477,8 +471,8 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) if (api_data && api_data->args.message) { message_size = strlen(api_data->args.message) + 1; } - rocprofiler::GetROCProfilerSingleton() - ->GetSession( + rocprofiler_singleton + .GetSession( reinterpret_cast(user_callback->second)->session_id) ->GetBuffer( reinterpret_cast(user_callback->second)->buffer_id) @@ -496,21 +490,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) case ACTIVITY_DOMAIN_HSA_OPS: if (auto pool = hsa_ops_activity_table.Get(operation_id)) { if (auto record = static_cast(data)) { - if (!rocprofiler::GetROCProfilerSingleton()) return 0; - if (rocprofiler::GetROCProfilerSingleton() && - rocprofiler::GetROCProfilerSingleton()->GetSession((*pool)->session_id) && - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + if (rocprofiler_singleton.GetSession((*pool)->session_id) && + rocprofiler_singleton.GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id)) { - std::lock_guard lock(rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + std::lock_guard lock(rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->GetBufferLock()); rocprofiler_record_tracer_t rocprofiler_record{}; rocprofiler_record.header = rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}; + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}; rocprofiler_record.domain = domain; rocprofiler_record.external_id = rocprofiler_tracer_external_id_t{0}; rocprofiler_record.operation_id = rocprofiler_tracer_operation_id_t{record->op}; @@ -525,16 +515,17 @@ int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) rocprofiler_record.phase = ROCPROFILER_PHASE_NONE; if (record->kernel_name != nullptr && record->op == HSA_OP_ID_DISPATCH) { size_t kernel_name_size = strlen(record->kernel_name) + 1; - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + + rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->AddRecord(rocprofiler_record, record->kernel_name, kernel_name_size, [](auto& rocprofiler_record, const void* data) { rocprofiler_record.name = static_cast(data); }); } else { - rocprofiler::GetROCProfilerSingleton() - ->GetSession((*pool)->session_id) + rocprofiler_singleton + .GetSession((*pool)->session_id) ->GetBuffer((*pool)->buffer_id) ->AddRecord(rocprofiler_record); } diff --git a/projects/rocprofiler/src/core/session/tracer/tracer.cpp b/projects/rocprofiler/src/core/session/tracer/tracer.cpp index ce723dfe7f..1e49326c28 100644 --- a/projects/rocprofiler/src/core/session/tracer/tracer.cpp +++ b/projects/rocprofiler/src/core/session/tracer/tracer.cpp @@ -116,9 +116,10 @@ std::mutex& Tracer::GetTracerLock() { return tracer_lock_; } void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_data, void* args) { api_callback_data_t* args_data = reinterpret_cast(args); rocprofiler_tracer_api_data_t api_data{}; - if (args_data && rocprofiler::GetROCProfilerSingleton() && - rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id) && - rocprofiler::GetROCProfilerSingleton()->GetSession(args_data->session_id)->GetTracer()) { + rocprofiler::ROCProfiler_Singleton& rocprofiler_singleton = rocprofiler::ROCProfiler_Singleton::GetInstance(); + if (args_data && + rocprofiler_singleton.GetSession(args_data->session_id) && + rocprofiler_singleton.GetSession(args_data->session_id)->GetTracer()) { switch (domain) { case ACTIVITY_DOMAIN_ROCTX: { const roctx_api_data_t* data = reinterpret_cast(callback_data); @@ -127,12 +128,12 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d rocprofiler_record_tracer_t{ rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{data ? data->args.id : 0}, ACTIVITY_DOMAIN_ROCTX, rocprofiler_tracer_operation_id_t{cid}, api_data, + rocprofiler_tracer_activity_correlation_id_t{0}, - rocprofiler_record_header_timestamp_t{roctracer::hsa_support::timestamp_ns(), + rocprofiler_record_header_timestamp_t{rocprofiler_singleton.timestamp_ns(), rocprofiler_timestamp_t{0}}, 0, 0, GetTid(), ROCPROFILER_PHASE_ENTER}, args_data->session_id); @@ -147,8 +148,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d rocprofiler_record_tracer_t{ rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API, rocprofiler_tracer_operation_id_t{cid}, api_data, rocprofiler_tracer_activity_correlation_id_t{data->correlation_id}, @@ -161,8 +161,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d rocprofiler_record_tracer_t{ rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HSA_API, rocprofiler_tracer_operation_id_t{cid}, api_data, rocprofiler_tracer_activity_correlation_id_t{data->correlation_id}, @@ -182,8 +181,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d rocprofiler_record_tracer_t{ rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API, rocprofiler_tracer_operation_id_t{cid}, api_data, rocprofiler_tracer_activity_correlation_id_t{data->correlation_id}, @@ -196,8 +194,7 @@ void api_callback(activity_domain_t domain, uint32_t cid, const void* callback_d rocprofiler_record_tracer_t{ rocprofiler_record_header_t{ ROCPROFILER_TRACER_RECORD, - rocprofiler_record_id_t{ - rocprofiler::GetROCProfilerSingleton()->GetUniqueRecordId()}}, + rocprofiler_record_id_t{rocprofiler_singleton.GetUniqueRecordId()}}, rocprofiler_tracer_external_id_t{0}, ACTIVITY_DOMAIN_HIP_API, rocprofiler_tracer_operation_id_t{cid}, api_data, rocprofiler_tracer_activity_correlation_id_t{data->correlation_id}, diff --git a/projects/rocprofiler/src/pcsampler/core/pc_sampler.cpp b/projects/rocprofiler/src/pcsampler/core/pc_sampler.cpp index b2e4db0736..760322462f 100644 --- a/projects/rocprofiler/src/pcsampler/core/pc_sampler.cpp +++ b/projects/rocprofiler/src/pcsampler/core/pc_sampler.cpp @@ -30,7 +30,6 @@ #include "src/api/rocprofiler_singleton.h" #include "src/pcsampler/session/pc_sampler.h" #include "src/pcsampler/gfxip/gfxip.h" -#include "src/core/hsa/hsa_common.h" #include "src/core/hsa/hsa_support.h" namespace rocprofiler::pc_sampler { @@ -59,17 +58,18 @@ void PCSampler::Start() { using agents_t = std::vector; agents_t agents; - rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn( - [](hsa_agent_t agent, void* arg) { - auto& agents = *reinterpret_cast(arg); - agents.emplace_back(agent); - return HSA_STATUS_SUCCESS; - }, - &agents); + HSASupport_Singleton& hsasupport_singleton = HSASupport_Singleton::GetInstance(); + hsasupport_singleton.GetCoreApiTable().hsa_iterate_agents_fn( + [](hsa_agent_t agent, void *arg){ + auto &agents = *reinterpret_cast(arg); + agents.emplace_back(agent); + return HSA_STATUS_SUCCESS; + }, + &agents); - for (const auto& agent : agents) { - const auto& ai = rocprofiler::hsa_support::GetAgentInfo(agent.handle); - if (ai.getType() != HSA_DEVICE_TYPE_GPU) { + for (const auto &agent : agents) { + const auto& ai = hsasupport_singleton.GetHSAAgentInfo(agent.handle); + if (ai.GetType() != HSA_DEVICE_TYPE_GPU) { continue; } devices_.emplace(agent.handle, gfxip::device_t{pci_system_initialized_, ai}); @@ -89,13 +89,13 @@ void PCSampler::Stop() { } void PCSampler::AddRecord(rocprofiler_record_pc_sample_t& record) { - const auto tool = rocprofiler::GetROCProfilerSingleton(); - const auto session = tool->GetSession(session_id_); + rocprofiler::ROCProfiler_Singleton& rocprofiler_instance = rocprofiler::ROCProfiler_Singleton::GetInstance(); + const auto session = rocprofiler_instance.GetSession(session_id_); const auto buffer = session->GetBuffer(buffer_id_); std::lock_guard lk(session->GetSessionLock()); - record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {tool->GetUniqueRecordId()}}; + record.header = {ROCPROFILER_PC_SAMPLING_RECORD, {rocprofiler_instance.GetUniqueRecordId()}}; buffer->AddRecord(record); } diff --git a/projects/rocprofiler/src/pcsampler/gfxip/gfxip.cpp b/projects/rocprofiler/src/pcsampler/gfxip/gfxip.cpp index 2db1af0060..5899f0c7ea 100644 --- a/projects/rocprofiler/src/pcsampler/gfxip/gfxip.cpp +++ b/projects/rocprofiler/src/pcsampler/gfxip/gfxip.cpp @@ -127,10 +127,10 @@ uint32_t debugfs_ioctl_read_register(const device_t& dev, return value; } -device_t::device_t(const bool pci_inited, const Agent::AgentInfo& info) +device_t::device_t(const bool pci_inited, const HSAAgentInfo& info) : agent_info_(info), pci_memory_(nullptr) { - const auto pci_domain = agent_info_.getPCIDomain(); - const auto pci_location_id = agent_info_.getPCILocationID(); + const auto pci_domain = agent_info_.GetDeviceInfo().getPCIDomain(); + const auto pci_location_id = agent_info_.GetDeviceInfo().getPCILocationID(); std::string name([pci_domain, pci_location_id]() { std::ostringstream out; diff --git a/projects/rocprofiler/src/pcsampler/gfxip/gfxip.h b/projects/rocprofiler/src/pcsampler/gfxip/gfxip.h index 586da05918..7e8dc3b180 100644 --- a/projects/rocprofiler/src/pcsampler/gfxip/gfxip.h +++ b/projects/rocprofiler/src/pcsampler/gfxip/gfxip.h @@ -37,6 +37,7 @@ #include "rocprofiler.h" #include "src/core/hardware/hsa_info.h" +#include "src/core/hsa/hsa_support.h" #include "src/utils/handle.h" #include @@ -99,14 +100,14 @@ static constexpr int HWIP_MAX_INSTANCE = 11; (REG_FIELD_MASK(reg, field) & ((field_val) << REG_FIELD_SHIFT(reg, field)))) struct device_t { - device_t(const bool pci_inited, const Agent::AgentInfo& agent_info); + device_t(const bool pci_inited, const HSAAgentInfo& agent_info); ~device_t(); device_t(const device_t&) = delete; device_t& operator=(const device_t&) = delete; device_t(device_t&&) = default; - const Agent::AgentInfo& agent_info_; + const HSAAgentInfo& agent_info_; struct pci_device* pci_device_; size_t pci_memory_size_; diff --git a/projects/rocprofiler/src/pcsampler/gfxip/gfxip_v9.cpp b/projects/rocprofiler/src/pcsampler/gfxip/gfxip_v9.cpp index 7f85e13c47..bd2e7c311a 100644 --- a/projects/rocprofiler/src/pcsampler/gfxip/gfxip_v9.cpp +++ b/projects/rocprofiler/src/pcsampler/gfxip/gfxip_v9.cpp @@ -104,8 +104,8 @@ void fill_record(const device_t& dev, rocprofiler_record_pc_sample_t* record, ui * comment in rocprofiler::hsa_support::Initialize about using KFD's gpu_id for * more information. */ - record->pc_sample.gpu_id = - rocprofiler_agent_id_t{(uint64_t)rocprofiler::hsa_support::GetAgentInfo(hdl).getIndex()}; + record->pc_sample.gpu_id = rocprofiler_agent_id_t{ + HSASupport_Singleton::GetInstance().GetHSAAgentInfo(hdl).GetDeviceInfo().getGPUId()}; } } // namespace @@ -116,9 +116,9 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) { uint32_t saved_grbm_gfx_index = dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)]; uint32_t data; - for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se) - for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh) - for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) { + for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se) + for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh) + for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) { // Select the SE, SH, and CU. data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, cu); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh); @@ -126,8 +126,8 @@ void read_pc_samples_v9(const device_t& dev, PCSampler* sampler) { dev.pci_memory_[REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX)] = data; // Iterate over all the waves in the compute unit. - for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd) - for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) { + for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd) + for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) { // FatalHalt the wave data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT); data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE); @@ -204,16 +204,16 @@ void read_pc_samples_v9_ioctl(const device_t& dev, PCSampler* sampler) { uint32_t data; - for (uint32_t se = 0; se < dev.agent_info_.getShaderEngineCount(); ++se) - for (uint32_t sh = 0; sh < dev.agent_info_.getShaderArraysPerSE(); ++sh) - for (uint32_t cu = 0; cu < dev.agent_info_.getCUCountPerSH(); ++cu) { + for (uint32_t se = 0; se < dev.agent_info_.GetDeviceInfo().getShaderEngineCount(); ++se) + for (uint32_t sh = 0; sh < dev.agent_info_.GetDeviceInfo().getShaderArraysPerSE(); ++sh) + for (uint32_t cu = 0; cu < dev.agent_info_.GetDeviceInfo().getCUCountPerSH(); ++cu) { ioc.grbm.se = se; ioc.grbm.sh = sh; ioc.grbm.instance = cu; // Iterate over all the waves in the compute unit. - for (uint32_t simd = 0; simd < dev.agent_info_.getSimdCountPerCU(); ++simd) - for (uint32_t wave_id = 0; wave_id < dev.agent_info_.getWaveSlotsPerSimd(); ++wave_id) { + for (uint32_t simd = 0; simd < dev.agent_info_.GetDeviceInfo().getSimdCountPerCU(); ++simd) + for (uint32_t wave_id = 0; wave_id < dev.agent_info_.GetDeviceInfo().getWaveSlotsPerSimd(); ++wave_id) { // FatalHalt the wave data = REG_SET_FIELD(0, SQ_CMD, CMD, SQ_IND_CMD_CMD_SETFATALHALT); data = REG_SET_FIELD(data, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE); diff --git a/projects/rocprofiler/tests-v2/CMakeLists.txt b/projects/rocprofiler/tests-v2/CMakeLists.txt index c57ba0f1ab..8ed4205e82 100644 --- a/projects/rocprofiler/tests-v2/CMakeLists.txt +++ b/projects/rocprofiler/tests-v2/CMakeLists.txt @@ -5,8 +5,9 @@ add_custom_target( check COMMAND ${PROJECT_BINARY_DIR}/run_tests.sh DEPENDS tests) -add_subdirectory(unittests) add_subdirectory(featuretests) add_subdirectory(memorytests) add_subdirectory(microbenchmarks) +add_subdirectory(HSAToolLibrary) +add_subdirectory(unittests) configure_file(run_tests.sh ${PROJECT_BINARY_DIR} COPYONLY) diff --git a/projects/rocprofiler/tests-v2/HSAToolLibrary/CMakeLists.txt b/projects/rocprofiler/tests-v2/HSAToolLibrary/CMakeLists.txt new file mode 100644 index 0000000000..b5a67ba168 --- /dev/null +++ b/projects/rocprofiler/tests-v2/HSAToolLibrary/CMakeLists.txt @@ -0,0 +1,52 @@ +# ############################################################################## +# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ############################################################################## + + + +# Setup unit testing env +# Handle HSA Interception Tool Tests +find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH}) + +find_package( + Clang REQUIRED CONFI + PATHS "${ROCM_PATH}" + PATH_SUFFIXES "llvm/lib/cmake/clang") + +file(GLOB TEST_HSATOOl_SRC_FILES ${PROJECT_SOURCE_DIR}/tests-v2/HSAToolLibrary/*.cpp) + +add_library(test_hsatool_library SHARED ${TEST_HSATOOl_SRC_FILES}) + +target_include_directories(test_hsatool_library PRIVATE ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + +set_target_properties( + test_hsatool_library + PROPERTIES CXX_VISIBILITY_PRESET hidden + DEFINE_SYMBOL TEST_HSA_TOOL_EXPORTS + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/tests-v2 + INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}") + +install( + TARGETS test_hsatool_library LIBRARY + DESTINATION ${CMAKE_INSTALL_LIBDIR}/${ROCPROFILER_NAME} + COMPONENT tests) + +target_link_libraries(test_hsatool_library PRIVATE hsa-runtime64::hsa-runtime64) diff --git a/projects/rocprofiler/src/core/hsa/hsa_common.h b/projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.cpp similarity index 50% rename from projects/rocprofiler/src/core/hsa/hsa_common.h rename to projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.cpp index 3ef2015142..5a95564734 100644 --- a/projects/rocprofiler/src/core/hsa/hsa_common.h +++ b/projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.cpp @@ -18,47 +18,41 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef SRC_CORE_HSA_HSA_COMMON_H_ -#define SRC_CORE_HSA_HSA_COMMON_H_ - -#include -#include -#include -#include -#include - -#include -#include - -#include "rocprofiler.h" -#include "src/core/hardware/hsa_info.h" - -#define ASSERTM(exp, msg) assert(((void)msg, exp)) - -namespace rocprofiler { -namespace hsa_support { +#include "HSATool.h" +extern "C" { -std::vector& GetCPUAgentList(); +/* + @brief The HSA_AMD_TOOL_PRIORITY variable must be a constant value type + initialized by the loader itself, not by code during _init. 'extern const' + seems do that although that is not a guarantee. +*/ -Agent::AgentInfo& GetAgentInfo(decltype(hsa_agent_t::handle) handle); -void SetAgentInfo(decltype(hsa_agent_t::handle) handle, const Agent::AgentInfo& agent_info); -hsa_agent_t GetAgentByIndex(uint64_t agent_index); +TEST_HSA_TOOL_API extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50; +static rocprofiler_onload_callback rocprofiler_onload_callback_call = nullptr; -CoreApiTable& GetCoreApiTable(); -void SetCoreApiTable(const CoreApiTable& table); +/* -AmdExtTable GetAmdExtTable(); -void SetAmdExtTable(AmdExtTable* table); + @brief Callback function called upon loading the HSA. + The function updates the core api table function pointers to point to the + interceptor functions in this file. -hsa_ven_amd_loader_1_01_pfn_t GetHSALoaderApi(); -void SetHSALoaderApi(); +*/ -void ResetMaps(); +TEST_HSA_TOOL_API bool OnLoad(void* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { + rocprofiler_onload_callback_call(table, runtime_version, failed_tool_count, failed_tool_names); + return true; +} -rocprofiler_timestamp_t GetCurrentTimestampNS(); +/* +@brief Callback function upon unloading the HSA. +*/ -} // namespace hsa_support -} // namespace rocprofiler +TEST_HSA_TOOL_API void OnUnload() { printf("\n\nTool is getting unloaded\n\n"); } -#endif // SRC_CORE_HSA_HSA_COMMON_H_ +} // extern "C" + +TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback) { + rocprofiler_onload_callback_call = callback; +} \ No newline at end of file diff --git a/projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.h b/projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.h new file mode 100644 index 0000000000..67097d7f01 --- /dev/null +++ b/projects/rocprofiler/tests-v2/HSAToolLibrary/HSATool.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include +#include +#include + + +/* Placeholder for calling convention and import/export macros */ +#if !defined(TEST_HSA_TOOL_EXPORT_CALL) +#define TEST_HSA_TOOL_EXPORT_CALL +#endif /* !defined (TEST_HSA_TOOL_EXPORT_CALL) */ + +#if !defined(TEST_HSA_TOOL_EXPORT_DECORATOR) +#if defined(__GNUC__) +#define TEST_HSA_TOOL_EXPORT_DECORATOR __attribute__((visibility("default"))) +#elif defined(_MSC_VER) +#define TEST_HSA_TOOL_EXPORT_DECORATOR __declspec(dllexport) +#endif /* defined (_MSC_VER) */ +#endif /* !defined (TEST_HSA_TOOL_EXPORT_DECORATOR) */ + +#if !defined(TEST_HSA_TOOL_IMPORT_DECORATOR) +#if defined(__GNUC__) +#define TEST_HSA_TOOL_IMPORT_DECORATOR +#elif defined(_MSC_VER) +#define TEST_HSA_TOOL_IMPORT_DECORATOR __declspec(dllimport) +#endif /* defined (_MSC_VER) */ +#endif /* !defined (TEST_HSA_TOOL_IMPORT_DECORATOR) */ +#define TEST_HSA_TOOL_EXPORT TEST_HSA_TOOL_EXPORT_DECORATOR TEST_HSA_TOOL_EXPORT_CALL +#define TEST_HSA_TOOL_IMPORT TEST_HSA_TOOL_IMPORT_DECORATOR TEST_HSA_TOOL_IMPORT_CALL +#if defined(TEST_HSA_TOOL_EXPORTS) +#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT +#else /* !defined (TEST_HSA_TOOL_EXPORTS) */ +#define TEST_HSA_TOOL_API TEST_HSA_TOOL_EXPORT +#endif /* !defined (TEST_HSA_TOOL_EXPORTS) */ +typedef int (*rocprofiler_onload_callback)( + void* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names); + + +TEST_HSA_TOOL_API void SetHSACallback(rocprofiler_onload_callback callback); \ No newline at end of file diff --git a/projects/rocprofiler/tests-v2/featuretests/gtests_main.cpp b/projects/rocprofiler/tests-v2/featuretests/gtests_main.cpp index cc85d44e78..a861d36b06 100644 --- a/projects/rocprofiler/tests-v2/featuretests/gtests_main.cpp +++ b/projects/rocprofiler/tests-v2/featuretests/gtests_main.cpp @@ -1,17 +1,14 @@ #include #include "src/core/hardware/hsa_info.h" -//#include "src/core/hsa/hsa_common.h" - -// Entry Point for Gtests Infra int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; // Add line below to disable any problematic test + hsa_init(); testing::GTEST_FLAG(filter) = "-OpenMPTest.*:ProfilerSPMTest.*:ProfilerMQTest.*:ProfilerMPTest.*:MPITest.*"; // Disable ATT test fir gfx10 GPUs until its supported - hsa_init(); // iterate for gpu's hsa_iterate_agents( [](hsa_agent_t agent, void*) { @@ -25,8 +22,9 @@ int main(int argc, char** argv) { } return HSA_STATUS_SUCCESS; }, - nullptr); - // hsa_shut_down(); // Waiting for hsa_shutdown bug to fix - // Append filter above to disable any problematic test - return RUN_ALL_TESTS(); + nullptr); +// Append filter above to disable any problematic test + int res = RUN_ALL_TESTS(); + hsa_shut_down(); + return res; } diff --git a/projects/rocprofiler/tests-v2/run_tests.sh b/projects/rocprofiler/tests-v2/run_tests.sh index 9d860271d9..1a1977e729 100755 --- a/projects/rocprofiler/tests-v2/run_tests.sh +++ b/projects/rocprofiler/tests-v2/run_tests.sh @@ -5,7 +5,11 @@ CURRENT_DIR="$( dirname -- "$0"; )"; echo -e "Running Profiler Tests" echo -e "running unit tests for rocprofiler" -eval ${CURRENT_DIR}/tests-v2/unittests/runUnitTests +eval ${CURRENT_DIR}/tests-v2/unittests/core/runCoreUnitTests + + +echo -e "running unit tests for rocprofiler" +eval ${CURRENT_DIR}/tests-v2/unittests/profiler/runUnitTests echo -e "running feature tests for rocprofiler" eval ${CURRENT_DIR}/tests-v2/featuretests/profiler/runFeatureTests diff --git a/projects/rocprofiler/tests-v2/unittests/CMakeLists.txt b/projects/rocprofiler/tests-v2/unittests/CMakeLists.txt index 432a4988c1..6ceb56ed28 100644 --- a/projects/rocprofiler/tests-v2/unittests/CMakeLists.txt +++ b/projects/rocprofiler/tests-v2/unittests/CMakeLists.txt @@ -1,110 +1,2 @@ -# Setup unit testing env - -find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED) - -enable_testing() -find_package(GTest REQUIRED) - -find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED) - -# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils -set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory) -file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp) - -set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session) -file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp) -file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp) -file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp) -file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp) - -set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) -file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) - -set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) -file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) - -set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils) -file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp) - -set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets) -file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp) - -file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp) - -file(GLOB ROCPROFILER_SRC_PROFILER_FILES - ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp) -file(GLOB ROCPROFILER_TRACER_SRC_FILES - ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp) -file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES - ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp) -file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp) -file(GLOB ROCPROFILER_SRC_CLASS_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp) -file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp) -file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp) -file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp) - -set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES} - ${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES}) - -set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa) -file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp) - -set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues) -file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp) - -set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler) -file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp - ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp) - -# Compiling gtests -file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp) - -file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp) -file(GLOB CORE_COUNTERS_METRICS_SRC_FILES - ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp) -file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp) - -add_executable( - runUnitTests - ${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp - ${CORE_MEMORY_SRC_FILES} - ${CORE_SESSION_SRC_FILES} - ${CORE_FILTER_SRC_FILES} - ${CORE_DEVICE_PROFILING_SRC_FILES} - ${CORE_COUNTERS_SAMPLER_SRC_FILES} - ${CORE_HW_SRC_FILES} - ${CORE_UTILS_SRC_FILES} - ${ROCPROFILER_SPM_SRC_FILES} - ${ROCPROFILER_SRC_FILES} - ${CORE_HSA_SRC_FILES} - ${CORE_HSA_PACKETS_SRC_FILES} - ${CORE_COUNTERS_SRC_FILES} - ${CORE_HSA_QUEUES_SRC_FILES} - ${ROCPROFILER_TRACER_SRC_FILES} - ${ROCPROFILER_ROCTRACER_SRC_FILES} - ${CORE_COUNTERS_METRICS_SRC_FILES} - ${CORE_COUNTERS_MMIO_SRC_FILES} - ${CORE_COUNTERS_PARENT_SRC_FILES} - ${CORE_PC_SAMPLING_FILES}) - -target_include_directories( - runUnitTests - PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc - ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR} - ${PROJECT_BINARY_DIR}/rocprofiler) - -target_compile_definitions( - runUnitTests - PUBLIC AMD_INTERNAL_BUILD - PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) - -target_link_libraries( - runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 - GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl) - -add_dependencies(tests runUnitTests) -install(TARGETS runUnitTests - RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests - COMPONENT tests) -add_test(AllTests runUnitTests) +add_subdirectory(profiler) +add_subdirectory(core) diff --git a/projects/rocprofiler/tests-v2/unittests/core/CMakeLists.txt b/projects/rocprofiler/tests-v2/unittests/core/CMakeLists.txt new file mode 100644 index 0000000000..c85a83382c --- /dev/null +++ b/projects/rocprofiler/tests-v2/unittests/core/CMakeLists.txt @@ -0,0 +1,133 @@ +# ############################################################################## +# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ############################################################################## + +find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED) + +enable_testing() +find_package(GTest REQUIRED) + +# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils +set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory) +file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp) + +set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session) +file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp) +file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp) +file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp) +file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp) + +set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) +file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) + +set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) +file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) + +set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils) +file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp) + +set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets) +file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp) + +file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp) + +file(GLOB ROCPROFILER_SRC_PROFILER_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp) +file(GLOB ROCPROFILER_TRACER_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp) +file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp) + file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp) + file(GLOB ROCPROFILER_SRC_CLASS_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp) + file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp) + file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp) + file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp) + set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES} + ${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES}) + +set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa) +file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp) + +set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues) +file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp) + +set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler) +file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp + ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp) + +# Compiling gtests +file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp) + +file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp) +file(GLOB CORE_COUNTERS_METRICS_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp) +file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp) +set(GTEST_MAIN_DIR ${PROJECT_SOURCE_DIR}/tests-v2/unittests/core) +file(GLOB GTEST_MAIN_SRC_FILE ${GTEST_MAIN_DIR}/gtests_main.cpp) +add_executable( + runCoreUnitTests + ${CORE_MEMORY_SRC_FILES} + ${CORE_SESSION_SRC_FILES} + ${CORE_FILTER_SRC_FILES} + ${CORE_DEVICE_PROFILING_SRC_FILES} + ${CORE_COUNTERS_SAMPLER_SRC_FILES} + ${CORE_HW_SRC_FILES} + ${CORE_UTILS_SRC_FILES} + ${ROCPROFILER_SPM_SRC_FILES} + ${ROCPROFILER_SRC_FILES} + ${CORE_HSA_SRC_FILES} + ${CORE_HSA_PACKETS_SRC_FILES} + ${CORE_COUNTERS_SRC_FILES} + ${CORE_HSA_QUEUES_SRC_FILES} + ${ROCPROFILER_TRACER_SRC_FILES} + ${ROCPROFILER_ROCTRACER_SRC_FILES} + ${CORE_COUNTERS_METRICS_SRC_FILES} + ${CORE_COUNTERS_MMIO_SRC_FILES} + ${CORE_COUNTERS_PARENT_SRC_FILES} + ${CORE_PC_SAMPLING_FILES} + ${GTEST_MAIN_SRC_FILE} + ${CMAKE_CURRENT_SOURCE_DIR}/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/HSASingleton/HSASingleton_unittests.cpp + ) + +target_include_directories( + runCoreUnitTests + PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc + ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR} + ${PROJECT_BINARY_DIR}/rocprofiler) + +target_compile_definitions( + runCoreUnitTests + PUBLIC AMD_INTERNAL_BUILD + PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) + + +target_link_libraries( + runCoreUnitTests PRIVATE rocprofiler_tool test_hsatool_library ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 + GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES}) + + +add_dependencies(tests runCoreUnitTests) +install(TARGETS runCoreUnitTests + RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests + COMPONENT tests) +add_test(AllTests runCoreUnitTests) \ No newline at end of file diff --git a/projects/rocprofiler/tests-v2/unittests/core/HSASingleton/HSASingleton_unittests.cpp b/projects/rocprofiler/tests-v2/unittests/core/HSASingleton/HSASingleton_unittests.cpp new file mode 100644 index 0000000000..221cdaa217 --- /dev/null +++ b/projects/rocprofiler/tests-v2/unittests/core/HSASingleton/HSASingleton_unittests.cpp @@ -0,0 +1,139 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include +#include +#include +#include +#include "api/rocprofiler_singleton.h" +#include "src/core/hsa/hsa_support.h" +#define MAX_THREADS 10000 +struct devices_t { + std::vector cpu_devices; + std::vector gpu_devices; + std::vector other_devices; +}; + +hsa_status_t device_cb_tool(hsa_agent_t agent, void* data) { + hsa_device_type_t device_type; + devices_t* devices = reinterpret_cast(data); + if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type) != HSA_STATUS_SUCCESS) { + std::cout << "hsa_iterate_agents failed" << std::endl; + std::exit(-1); + } + switch (device_type) { + case HSA_DEVICE_TYPE_CPU: + devices->cpu_devices.push_back(agent); + break; + case HSA_DEVICE_TYPE_GPU: + devices->gpu_devices.push_back(agent); + break; + default: + devices->other_devices.push_back(agent); + break; + } + return HSA_STATUS_SUCCESS; +} +void get_hsa_agents_list_tool(devices_t* device_list) { + // Enumerate the agents. + if (hsa_iterate_agents(device_cb_tool, device_list) != HSA_STATUS_SUCCESS) { + std::cout << "hsa_iterate_agents failed" << std::endl; + std::exit(-1); + } +} + + +class TestHSASupportSingleton { + public: + uint64_t ref_address; + TestHSASupportSingleton() { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + ref_address = reinterpret_cast(&hsasupport_singleton); + } +}; + +void instanciateHSASupportSingleton(int index, uint64_t *ref_array) { + TestHSASupportSingleton test; + *(ref_array+index) = test.ref_address; +} + +//Add more threads here +TEST(WhenInvokingHSASingleton, HSASupportSingletonInstanciation) { + std::vector threads; + uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS)); + for(int i = 0; i < MAX_THREADS; i++) { + threads.emplace_back(instanciateHSASupportSingleton, i, &refaddress[0]); + } + for (auto&& thread : threads) thread.join(); + uint64_t ref_addr = refaddress[0]; + for(int i = 1; i < MAX_THREADS; i++) + EXPECT_EQ(ref_addr, refaddress[i]) << "HSASingleton Instanciation failed"; +} + + +TEST(WhenInvokingGetHSAInitialize, TestHSASupportSingleton) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + devices_t device_list; + get_hsa_agents_list_tool(&device_list); + for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) { + [[maybe_unused]]rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle); + } + EXPECT_EQ(hsasupport_singleton.gpu_agents.size(), device_list.gpu_devices.size()) << "HSAInitialize failed"; +} + +TEST(WhenInvokingGetHSAAgentInfo, TestHSASupportSingleton) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + devices_t device_list; + get_hsa_agents_list_tool(&device_list); + for(auto it = device_list.gpu_devices.begin(); it != device_list.gpu_devices.end(); it++) { + rocprofiler::HSAAgentInfo& agent_info = hsasupport_singleton.GetHSAAgentInfo(it->handle); + uint32_t gpu_id; + char name[64]; + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn( + *it, (hsa_agent_info_t)(HSA_AMD_AGENT_INFO_DRIVER_UID), &gpu_id); + hsasupport_singleton.GetCoreApiTable().hsa_agent_get_info_fn(*it, HSA_AGENT_INFO_NAME, name); + + EXPECT_EQ(agent_info.GetDeviceInfo().getGPUId(), gpu_id) << "HSAAgentInfo has incorrect gpu id for the agent: " << it->handle; + EXPECT_EQ(strcmp(agent_info.GetDeviceInfo().getName().data(), name), 0) << "HSAAgentInfo has incorrect gpu name for the agent: " << it->handle; + } +} + +TEST(WhenInvokingQueueInterceptors, TestQueueInterceptors) { + + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsa_queue_t* queue1 = nullptr, *queue2 = nullptr; + + hsa_status_t status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, + UINT32_MAX, &queue1); + + + EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue create interceptor failed"; + status = hsa_queue_create(hsasupport_singleton.gpu_agents[0], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, + UINT32_MAX, &queue2); + status = hsa_queue_destroy(queue1); + EXPECT_EQ(status, HSA_STATUS_SUCCESS) << "Queue destroy interceptor failed"; +} + diff --git a/projects/rocprofiler/tests-v2/unittests/core/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp b/projects/rocprofiler/tests-v2/unittests/core/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp new file mode 100644 index 0000000000..cfb33d0c43 --- /dev/null +++ b/projects/rocprofiler/tests-v2/unittests/core/ROCProfiler_Singleton/ROCProfiler_Singleton_unittests.cpp @@ -0,0 +1,207 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include +#include +#include + +#include +#include "api/rocprofiler_singleton.h" +#include "src/core/hsa/hsa_support.h" + +namespace fs = std::experimental::filesystem; +using namespace std::string_literals; + +#define MAX_THREADS 10000 +TEST(WhenTestingDeviceInfo, TestFailFatal) { + fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes"); + fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes"; + if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str()); + for (auto const& dirp_entry : fs::directory_iterator{dirp}) { + fs::path node_path = dirp_entry.path(); + long long node_id = std::stoll(std::string(dirp_entry.path().stem().string())); + fs::path gpu_path = node_path / "gpu_id"; + std::ifstream gpu_id_file(gpu_path); + std::string gpu_id_str; + long long gpu_id = 0; + if (gpu_id_file.is_open()) { + gpu_id_file >> gpu_id_str; + if (!gpu_id_str.empty()) { + gpu_id = std::stoll(gpu_id_str); + if (gpu_id == 0) { + EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, gpu_id), ""); + break; + } + } + } + } +} + +TEST(WhenTestingDeviceInfo, DeviceInfoReadSuccessfully) { + fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes"); + fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes"; + if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str()); + uint64_t gpu_id = 0; + uint32_t wave_front_size = 0; + uint32_t location_id = 0; + uint32_t domain = 0; + uint32_t shader_arrays_per_se = 0; + [[maybe_unused]] uint32_t max_waves_per_simd = 0; + uint32_t array_count = 0; + uint32_t se_num = 0; + uint32_t cu_num = 0; + uint32_t cu_per_simd_array = 0; + uint32_t simd_per_cu = 0; + uint64_t unique_gpu_id = 0; + uint32_t xcc_num = 1; + uint32_t compute_units_per_sh = 0; + long long topology_id = 0; + uint32_t waves_per_cu = 0; + uint32_t wave_slots_per_simd = 0; + rocprofiler::ROCProfiler_Singleton& rocprofiler_instance = + rocprofiler::ROCProfiler_Singleton::GetInstance(); + for (auto const& dirp_entry : fs::directory_iterator{dirp}) { + fs::path node_path = dirp_entry.path(); + topology_id = std::stoll(dirp_entry.path().stem().string()); + fs::path gpu_path = node_path / "gpu_id"; + std::ifstream gpu_id_file(gpu_path.c_str()); + std::string gpu_id_str; + if (gpu_id_file.is_open()) { + gpu_id_file >> gpu_id_str; + if (!gpu_id_str.empty()) { + gpu_id = std::stoll(gpu_id_str); + if (gpu_id > 0) { + const Agent::DeviceInfo& device_info = rocprofiler_instance.GetDeviceInfo(gpu_id); + fs::path properties_path = node_path / "properties"; + std::ifstream props_ifs(properties_path); + if (!props_ifs.is_open()) + rocprofiler::fatal("Could not open %s/properties", properties_path.c_str()); + std::string prop_name; + uint64_t prop_value; + EXPECT_TRUE(gpu_id == device_info.getGPUId()); + while (props_ifs >> prop_name >> prop_value) { + if (prop_name == "wave_front_size") + wave_front_size = static_cast(prop_value); + else if (prop_name == "array_count") + array_count = static_cast(prop_value); + else if (prop_name == "simd_per_cu") + simd_per_cu = static_cast(prop_value); + else if (prop_name == "location_id") + location_id = static_cast(prop_value); + else if (prop_name == "domain") + domain = static_cast(prop_value); + else if (prop_name == "simd_arrays_per_engine") + shader_arrays_per_se = static_cast(prop_value); + else if (prop_name == "max_waves_per_simd") + max_waves_per_simd = static_cast(prop_value); + else if (prop_name == "cu_per_simd_array") + cu_per_simd_array = static_cast(prop_value); + else if (prop_name == "unique_id") + unique_gpu_id = static_cast(prop_value); + else if (prop_name == "num_xcc") + xcc_num = static_cast(prop_value); + } + se_num = array_count / shader_arrays_per_se; + cu_num = cu_per_simd_array * array_count; + waves_per_cu = 8 * simd_per_cu; + compute_units_per_sh = cu_num / (se_num * shader_arrays_per_se); + wave_slots_per_simd = waves_per_cu / simd_per_cu; + + + EXPECT_TRUE(wave_front_size == device_info.getMaxWaveSize()) + << "Device Info has incorrect wave_front_size "; + EXPECT_TRUE(simd_per_cu == device_info.getSimdCountPerCU()) + << "Device Info has incorrect simd_per_cu"; + EXPECT_TRUE(location_id == device_info.getPCILocationID()) + << "Device Info has incorrect location_id"; + EXPECT_TRUE(se_num == device_info.getShaderEngineCount()) + << "Device Info has incorrect se_num"; + EXPECT_TRUE(waves_per_cu == device_info.getMaxWavesPerCU()) + << "Device Info has incorrect waves_per_cu"; + + EXPECT_TRUE(domain == device_info.getPCIDomain()) << "Device Info has incorrect domain"; + EXPECT_TRUE(shader_arrays_per_se == device_info.getShaderArraysPerSE()) + << "Device Info has incorrect shader_arrays_per_se"; + EXPECT_TRUE(cu_num == device_info.getCUCount()) << "Device Info has incorrect cu_num"; + EXPECT_TRUE(compute_units_per_sh == device_info.getCUCountPerSH()) + << "Device Info has incorrect compute_units_per_sh "; + EXPECT_TRUE(wave_slots_per_simd == device_info.getWaveSlotsPerSimd()) + << "Device Info has incorrect wave_slots_per_simd "; + EXPECT_TRUE(unique_gpu_id == device_info.getUniqueGPUId()) + << "Device Info has incorrect unique_gpu_id "; + EXPECT_TRUE(xcc_num == device_info.getXccCount()) << "Device Info has incorrect xcc_num "; + ; + EXPECT_TRUE(static_cast(topology_id) == device_info.getNumaNode()) + << "Device Info has incorrect topology_id"; + } + } + } + } +} + +TEST(WhenTestingDeviceInfo, GetDeviceInfoFail) { + fs::directory_entry dirp("/sys/class/kfd/kfd/topology/nodes"); + fs::path sysfs_nodes_path = "/sys/class/kfd/kfd/topology/nodes"; + if (!fs::exists(sysfs_nodes_path)) rocprofiler::fatal("Could not opendir `%s'", sysfs_nodes_path.c_str()); + uint64_t node_id = 0; + for ([[maybe_unused]] auto const& dirp_entry : fs::directory_iterator{dirp}) { + node_id++; + } + node_id++; + fs::path node_path = sysfs_nodes_path / std::to_string(node_id); + std::stringstream error; + error << "Could not opendir `" << node_path.c_str() << "'"; + + EXPECT_DEATH(Agent::DeviceInfo deviceInfo(node_id, 1), error.str().c_str()); +} + + +class TestRocprofilerSingleton { + public: + uintptr_t ref_address; + TestRocprofilerSingleton() { + rocprofiler::ROCProfiler_Singleton& rocprofiler = + rocprofiler::ROCProfiler_Singleton::GetInstance(); + ref_address = reinterpret_cast(&rocprofiler); + } +}; + +void instantiateRocprofiler(uint64_t *target) { + TestRocprofilerSingleton test; + *target = test.ref_address; + +} + + +//Add more threads here +TEST(WhenInvokingRocprofilerSingleton, RocprofilerSingletonInstanciation) { + std::vector threads; + uint64_t *refaddress = (uint64_t*)malloc(sizeof(uint64_t)*(MAX_THREADS)); + for(int i = 0; i < MAX_THREADS; i++) { + threads.emplace_back(instantiateRocprofiler, &refaddress[i]); + } + for (auto&& thread : threads) thread.join(); + uint64_t ref_addr = refaddress[0]; + for(int i = 1; i < MAX_THREADS; i++) + EXPECT_EQ(ref_addr, refaddress[i]) << "RocprofilerSingleton Instanciation failed"; + free(refaddress); +} diff --git a/projects/rocprofiler/tests-v2/unittests/core/gtests_main.cpp b/projects/rocprofiler/tests-v2/unittests/core/gtests_main.cpp new file mode 100644 index 0000000000..e2110cee4c --- /dev/null +++ b/projects/rocprofiler/tests-v2/unittests/core/gtests_main.cpp @@ -0,0 +1,114 @@ + +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "src/core/hardware/hsa_info.h" +#include "api/rocprofiler_singleton.h" +#include "src/core/hsa/hsa_support.h" +#include "tests-v2/HSAToolLibrary/HSATool.h" + +// used for dl_addr to locate the running +// path for executable +int main(int argc, char** argv); +std::string metrics_path; +std::string running_path; + +bool is_installed_path() { + std::string path; + char* real_path; + Dl_info dl_info; + + if (0 != dladdr(reinterpret_cast(main), &dl_info)) { + path = dl_info.dli_fname; + real_path = realpath(path.c_str(), NULL); + if (real_path == nullptr) { + throw(std::string("Error! in extracting real path")); + } + path.clear(); // reset path + path.append(real_path); + if (path.find("/opt") != std::string::npos) { + return true; + } + } + return false; +} + + +static void init_test_path() { + metrics_path = "libexec/rocprofiler/counters/derived_counters.xml"; + if (is_installed_path()) { + running_path = "share/rocprofiler/tests/runCoreUnitTests"; + } else { + running_path = "tests-v2/unittests/core/runCoreUnitTests"; + } +} + + +// This function returns the running path of executable +std::string GetRunningPath(std::string string_to_erase) { + std::string path; + const char* real_path; + Dl_info dl_info; + + if (0 != dladdr(reinterpret_cast(main), &dl_info)) { + std::string to_erase = string_to_erase; + path = dl_info.dli_fname; + real_path = realpath(path.c_str(), NULL); + if (real_path == nullptr) { + throw(std::string("Error! in extracting real path")); + } + path.clear(); // reset path + path.append(real_path); + + size_t pos = path.find(to_erase); + if (pos != std::string::npos) path.erase(pos, to_erase.length()); + } else { + throw(std::string("Error! in extracting real path")); + } + + return path; +} + + +int HSATool_onload_callback(void* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { + rocprofiler::HSASupport_Singleton& hsasupport_singleton = + rocprofiler::HSASupport_Singleton::GetInstance(); + hsasupport_singleton.HSAInitialize(reinterpret_cast(table)); + return true; +} + + +int main(int argc, char** argv) { + init_test_path(); + SetHSACallback(HSATool_onload_callback); + metrics_path = "libexec/rocprofiler/counters/derived_counters.xml"; + std::string app_path = GetRunningPath(running_path); + std::stringstream gfx_path; + gfx_path << app_path << metrics_path; + setenv("ROCPROFILER_METRICS_PATH", gfx_path.str().c_str(), true); + testing::InitGoogleTest(&argc, argv); + testing::FLAGS_gtest_death_test_style = "threadsafe"; + hsa_init(); + int status = RUN_ALL_TESTS(); + hsa_shut_down(); + return status; +} diff --git a/projects/rocprofiler/tests-v2/unittests/profiler/CMakeLists.txt b/projects/rocprofiler/tests-v2/unittests/profiler/CMakeLists.txt new file mode 100644 index 0000000000..393be7f1b7 --- /dev/null +++ b/projects/rocprofiler/tests-v2/unittests/profiler/CMakeLists.txt @@ -0,0 +1,110 @@ +# Setup unit testing env + +find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED) + +enable_testing() +find_package(GTest REQUIRED) + +find_library(GDB rocm-dbgapi PATHS ${ROCM_PATH} REQUIRED) + +# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils +set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory) +file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp) + +set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session) +file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp) +file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp) +file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp) +file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp) + +set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) +file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) + +set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware) +file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp) + +set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils) +file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp) + +set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets) +file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp) + +file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp) + +file(GLOB ROCPROFILER_SRC_PROFILER_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp) +file(GLOB ROCPROFILER_TRACER_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp) +file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp) +file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/*.cpp) +file(GLOB ROCPROFILER_SRC_CLASS_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/rocprofiler_singleton.cpp) +file(GLOB ROCPROFILER_ISA_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/isa_capture/*.cpp) +file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp) +file(GLOB ROCPROFILER_SRC_API_FILES ${PROJECT_SOURCE_DIR}/src/api/*.cpp) +set(ROCPROFILER_SRC_FILES ${ROCPROFILER_SRC_API_FILES} ${ROCPROFILER_ATT_SRC_FILES} + ${ROCPROFILER_ISA_SRC_FILES} ${ROCPROFILER_SRC_PROFILER_FILES} ${ROCPROFILER_ATT_SRC_FILES}) + +set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa) +file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp) + +set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues) +file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/*.cpp) + +set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler) +file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp + ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp) + +# Compiling gtests +file(GLOB ROCPROFILER_TOOL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/tools/tool.cpp) + +file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp) +file(GLOB CORE_COUNTERS_METRICS_SRC_FILES + ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp) +file(GLOB CORE_COUNTERS_MMIO_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/mmio/*.cpp) +add_executable( + runUnitTests + ${CMAKE_CURRENT_SOURCE_DIR}/profiler_gtest.cpp + ${CORE_MEMORY_SRC_FILES} + ${CORE_SESSION_SRC_FILES} + ${CORE_FILTER_SRC_FILES} + ${CORE_DEVICE_PROFILING_SRC_FILES} + ${CORE_COUNTERS_SAMPLER_SRC_FILES} + ${CORE_HW_SRC_FILES} + ${CORE_UTILS_SRC_FILES} + ${ROCPROFILER_SPM_SRC_FILES} + ${ROCPROFILER_SRC_FILES} + ${CORE_HSA_SRC_FILES} + ${CORE_HSA_PACKETS_SRC_FILES} + ${CORE_COUNTERS_SRC_FILES} + ${CORE_HSA_QUEUES_SRC_FILES} + ${ROCPROFILER_TRACER_SRC_FILES} + ${ROCPROFILER_ROCTRACER_SRC_FILES} + ${CORE_COUNTERS_METRICS_SRC_FILES} + ${CORE_COUNTERS_MMIO_SRC_FILES} + ${CORE_COUNTERS_PARENT_SRC_FILES} + ${CORE_PC_SAMPLING_FILES} + ${GTEST_MAIN_SRC_FILE} +) + +target_include_directories( + runUnitTests + PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/inc + ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR} + ${PROJECT_BINARY_DIR}/rocprofiler) + +target_compile_definitions( + runUnitTests + PUBLIC AMD_INTERNAL_BUILD + PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) + +target_link_libraries( + runUnitTests PRIVATE rocprofiler_tool ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 + GTest::gtest GTest::gtest_main stdc++fs ${PCIACCESS_LIBRARIES} ${GDB} dw elf c dl) + +add_dependencies(tests runUnitTests) +install(TARGETS runUnitTests + RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests + COMPONENT tests) +add_test(AllTests runUnitTests) \ No newline at end of file diff --git a/projects/rocprofiler/tests-v2/unittests/profiler_gtest.cpp b/projects/rocprofiler/tests-v2/unittests/profiler/profiler_gtest.cpp similarity index 94% rename from projects/rocprofiler/tests-v2/unittests/profiler_gtest.cpp rename to projects/rocprofiler/tests-v2/unittests/profiler/profiler_gtest.cpp index 9c62ccd4d3..6145cd04cd 100644 --- a/projects/rocprofiler/tests-v2/unittests/profiler_gtest.cpp +++ b/projects/rocprofiler/tests-v2/unittests/profiler/profiler_gtest.cpp @@ -30,27 +30,12 @@ #include "core/session/session.h" #include "utils/helper.h" - /* * ############################################### * ################TESTING HSA_INFO############### * ############################################### */ -TEST(WhenTestingAgentInfoGetterSetters, TestRunsSuccessfully) { - Agent::AgentInfo agent_info = Agent::AgentInfo(); - char gpu_name[] = "gfx10"; - agent_info.setName(gpu_name); - agent_info.setIndex(0); - agent_info.setType(hsa_device_type_t::HSA_DEVICE_TYPE_GPU); - - EXPECT_EQ(agent_info.getName(), gpu_name); - EXPECT_EQ(agent_info.getIndex(), 0); - EXPECT_EQ(agent_info.getType(), hsa_device_type_t::HSA_DEVICE_TYPE_GPU); - - Agent::CounterHardwareInfo hw_info(0, "GRBM"); - EXPECT_TRUE(getHardwareInfo(0, "GRBM", &hw_info)); -} void buffer_callback_fun(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end, @@ -66,7 +51,7 @@ void buffer_callback_fun(const rocprofiler_record_header_t* begin, // A lot have changed in the class, since this test was written // Need to rewrite all the test cases again. -TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) { +TEST(WhenAddingARecordToBuffer, DISABLED_RecordGetsAddedSuccefully) { Memory::GenericBuffer* buffer = new Memory::GenericBuffer( rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0}, 0x8000, buffer_callback_fun); @@ -285,7 +270,7 @@ void (*callback_fun)(const rocprofiler_record_header_t* begin, TEST(WhenTestingCounterCollectionMode, TestSucceeds) { rocprofiler_session_id_t session_id; - rocprofiler::ROCProfiler_Singleton toolobj; + rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance(); session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE); rocprofiler_filter_id_t filter_id = toolobj.GetSession(session_id) @@ -304,7 +289,7 @@ TEST(WhenTestingCounterCollectionMode, TestSucceeds) { TEST(WhenTestingTimeStampCollectionMode, TestSucceeds) { rocprofiler_session_id_t session_id; - rocprofiler::ROCProfiler_Singleton toolobj; + rocprofiler::ROCProfiler_Singleton& toolobj = rocprofiler::ROCProfiler_Singleton::GetInstance(); session_id = toolobj.CreateSession(ROCPROFILER_NONE_REPLAY_MODE); rocprofiler_filter_id_t filter_id = toolobj.GetSession(session_id)