From da6feb0220e3be9e7bb25b16ba625f8b460ede8e Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 22:39:22 -0600 Subject: [PATCH] hsa_rsrc_factory sync [ROCm/roctracer commit: 05315465cfa0510558caa418b4cb678e9ea8d674] --- .../roctracer/src/util/hsa_rsrc_factory.cpp | 90 +++++++++++++--- .../roctracer/src/util/hsa_rsrc_factory.h | 100 +++++++++++++++++- 2 files changed, 176 insertions(+), 14 deletions(-) diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.cpp b/projects/roctracer/src/util/hsa_rsrc_factory.cpp index ccb1cd9de3..e1ef92683e 100644 --- a/projects/roctracer/src/util/hsa_rsrc_factory.cpp +++ b/projects/roctracer/src/util/hsa_rsrc_factory.cpp @@ -44,9 +44,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#include "util/exception.h" -#include "util/logger.h" - namespace util { // Callback function to get available in the system agents @@ -149,6 +146,11 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CHECK_STATUS("HSA timer allocation failed", (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + // Time correlation + const uint32_t corr_iters = 1000; + CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters); + CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); + // System timeout timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } @@ -192,6 +194,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; + hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; @@ -230,6 +234,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; hsa_api_.hsa_executable_freeze = hsa_executable_freeze; hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; + hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; hsa_api_.hsa_system_get_info = hsa_system_get_info; hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; @@ -336,6 +342,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); + // GFX8 and GFX9 SGPR/VGPR block sizes + agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; + agent_info->sgpr_block_size = 8; + agent_info->vgpr_block_size = 4; + // Set GPU index agent_info->dev_index = gpu_list_.size(); gpu_list_.push_back(agent_info); @@ -508,22 +519,25 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s } // Wait signal -void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + const hsa_signal_value_t exp_value = signal_value - 1; + hsa_signal_value_t ret_value = signal_value; while (1) { - const hsa_signal_value_t signal_value = - hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); - if (signal_value == 0) { - break; - } else { - if (signal_value == 1) WARN_LOGGING("signal waiting..."); - else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); + ret_value = + hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + if (ret_value == exp_value) break; + if (ret_value != signal_value) { + std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value + << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + abort(); } } + return ret_value; } // Wait signal with signal value restore void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { - SignalWait(signal); + SignalWait(signal, signal_value); hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } @@ -536,7 +550,7 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src CHECK_STATUS("hsa_signal_create()", status); status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); CHECK_STATUS("hsa_amd_memory_async_copy()", status); - SignalWait(s); + SignalWait(s, 1); status = hsa_api_.hsa_signal_destroy(s); CHECK_STATUS("hsa_signal_destroy()", status); } @@ -680,9 +694,59 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s return write_idx; } +const char* HsaRsrcFactory::GetKernelName(uint64_t addr) { + std::lock_guard lck(mutex_); + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); + abort(); + } + return strdup(it->second); +} + +void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { + std::lock_guard lck(mutex_); + executable_tracking_on_ = true; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; +} + +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { + hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; + hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + CHECK_STATUS("Error in getting symbol info", status); + if (value == HSA_SYMBOL_KIND_KERNEL) { + uint64_t addr = 0; + uint32_t len = 0; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + CHECK_STATUS("Error in getting kernel object", status); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + CHECK_STATUS("Error in getting name len", status); + char *name = new char[len + 1]; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); + CHECK_STATUS("Error in getting kernel name", status); + name[len] = 0; + auto ret = symbols_map_->insert({addr, name}); + if (ret.second == false) { + delete[] ret.first->second; + ret.first->second = name; + } + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { + std::lock_guard lck(mutex_); + if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + CHECK_STATUS("Error in iterating executable symbols", status); + return hsa_api_.hsa_executable_freeze(executable, options);; +} + std::atomic HsaRsrcFactory::instance_{}; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; hsa_pfn_t HsaRsrcFactory::hsa_api_{}; +bool HsaRsrcFactory::executable_tracking_on_ = false; +HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL; } // namespace util diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.h b/projects/roctracer/src/util/hsa_rsrc_factory.h index 51824a5212..c52715d4e6 100644 --- a/projects/roctracer/src/util/hsa_rsrc_factory.h +++ b/projects/roctracer/src/util/hsa_rsrc_factory.h @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -94,6 +95,8 @@ struct hsa_pfn_t { decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; decltype(hsa_executable_freeze)* hsa_executable_freeze; decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; + decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; + decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; decltype(hsa_system_get_info)* hsa_system_get_info; decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; @@ -159,6 +162,11 @@ struct AgentInfo { // Number of Shader Arrays Per Shader Engines in Gpu uint32_t shader_arrays_per_se; + + // SGPR/VGPR block sizes + uint32_t sgpr_block_dflt; + uint32_t sgpr_block_size; + uint32_t vgpr_block_size; }; // HSA timer class @@ -169,6 +177,12 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; + enum time_id_t { + TIME_ID_CLOCK_REALTIME = 0, + TIME_ID_CLOCK_MONOTONIC = 1, + TIME_ID_NUMBER + }; + HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); @@ -184,6 +198,11 @@ class HsaTimer { return timestamp_t((freq_t)time / sysclock_factor_); } + // Method for timespec/ns conversion + timestamp_t timespec_to_ns(const timespec& time) const { + return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; + } + // Return timestamp in 'ns' timestamp_t timestamp_ns() const { timestamp_t sysclock; @@ -192,6 +211,54 @@ class HsaTimer { return sysclock_to_ns(sysclock); } + // Return time in 'ns' + timestamp_t clocktime_ns(clockid_t clock_id) const { + timespec time; + clock_gettime(clock_id, &time); + return timespec_to_ns(time); + } + + // Return pair of correlated values of profiling timestamp and time with + // correlation error for a given time ID and number of iterations + void correlated_pair_ns(time_id_t time_id, uint32_t iters, + timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) { + clockid_t clock_id = 0; + switch (clock_id) { + case TIME_ID_CLOCK_REALTIME: + clock_id = CLOCK_REALTIME; + break; + case TIME_ID_CLOCK_MONOTONIC: + clock_id = CLOCK_MONOTONIC; + break; + default: + CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); + } + + std::vector ts_vec(iters); + std::vector tm_vec(iters); + const uint32_t steps = iters - 1; + + for (uint32_t i = 0; i < iters; ++i) { + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); + clock_gettime(clock_id, &tm_vec[i]); + } + + const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); + const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); + const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); + + timestamp_t ts_accum = 0; + timestamp_t tm_accum = 0; + for (uint32_t i = 0; i < iters; ++i) { + ts_accum += (ts_vec[i] - ts_base); + tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); + } + + *timestamp_v = (ts_accum / iters) + ts_base + error; + *time_v = (tm_accum / iters) + tm_base; + *error_v = error; + } + private: // Timestamp frequency factor freq_t sysclock_factor_; @@ -293,7 +360,7 @@ class HsaRsrcFactory { uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); // Wait signal - void SignalWait(const hsa_signal_t& signal) const; + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; // Wait signal with signal value restore void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; @@ -322,6 +389,11 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet); static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); + // Enable executables loading tracking + static bool IsExecutableTracking() { return executable_tracking_on_; } + static void EnableExecutableTracking(HsaApiTable* table); + static const char* GetKernelName(uint64_t addr); + // Initialize HSA API table void static InitHsaApiTable(HsaApiTable* table); static const hsa_pfn_t* HsaApi() { return &hsa_api_; } @@ -346,6 +418,21 @@ class HsaRsrcFactory { if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); } + void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) { + timestamp_t timestamp_v = 0; + timestamp_t time_v = 0; + timestamp_t error_v = 0; + timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); + time_shift_[time_id] = time_v - timestamp_v; + time_error_[time_id] = error_v; + } + + hsa_status_t GetTime(uint32_t time_id, uint64_t value, uint64_t* time) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *time = value + time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + private: // System agents iterating callback static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); @@ -386,6 +473,13 @@ class HsaRsrcFactory { // System agents map std::map agent_map_; + // Executables loading tracking + typedef std::map symbols_map_t; + static symbols_map_t* symbols_map_; + static bool executable_tracking_on_; + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + // HSA runtime API table static hsa_pfn_t hsa_api_; @@ -403,6 +497,10 @@ class HsaRsrcFactory { // HSA timer HsaTimer* timer_; + // Time shift array to support time conversion + timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; + timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; + // CPU/kern-arg memory pools hsa_amd_memory_pool_t *cpu_pool_; hsa_amd_memory_pool_t *kern_arg_pool_;