diff --git a/projects/roctracer/src/roctracer/hsa_support.cpp b/projects/roctracer/src/roctracer/hsa_support.cpp index cf7cbc6040..c1e48aee22 100644 --- a/projects/roctracer/src/roctracer/hsa_support.cpp +++ b/projects/roctracer/src/roctracer/hsa_support.cpp @@ -25,11 +25,11 @@ #include "memory_pool.h" #include "roctracer.h" #include "roctracer_hsa.h" -#include "tracker.h" #include "util/callback_table.h" #include "util/logger.h" #include +#include #include #include #include @@ -58,6 +58,127 @@ struct AgentInfo { }; std::unordered_map agent_info_map; +class Tracker { + public: + enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; + + enum entry_type_t { + DFLT_ENTRY_TYPE = 0, + API_ENTRY_TYPE = 1, + COPY_ENTRY_TYPE = 2, + KERNEL_ENTRY_TYPE = 3, + NUM_ENTRY_TYPE = 4 + }; + + struct entry_t { + std::atomic valid; + entry_type_t type; + uint64_t correlation_id; + roctracer_timestamp_t begin; // begin timestamp, ns + roctracer_timestamp_t end; // end timestamp, ns + hsa_agent_t agent; + uint32_t dev_index; + hsa_signal_t orig; + hsa_signal_t signal; + void (*handler)(const entry_t*); + MemoryPool* pool; + union { + struct { + } copy; + struct { + const char* name; + hsa_agent_t agent; + uint32_t tid; + } kernel; + }; + }; + + // Add tracker entry + inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, + entry_t* entry) { + hsa_status_t status = HSA_STATUS_ERROR; + + // Creating a new tracker entry + entry->type = type; + entry->agent = agent; + entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; + entry->orig = signal; + entry->valid.store(ENTRY_INIT, std::memory_order_release); + + // Creating a proxy signal + status = saved_core_api.hsa_signal_create_fn(1, 0, NULL, &(entry->signal)); + if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_signal_create failed"); + status = saved_amd_ext_api.hsa_amd_signal_async_handler_fn( + entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); + if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_amd_signal_async_handler failed"); + } + + // Delete tracker entry + inline static void Disable(entry_t* entry) { + saved_core_api.hsa_signal_destroy_fn(entry->signal); + entry->valid.store(ENTRY_INV, std::memory_order_release); + } + + private: + // Entry completion + inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { + static roctracer_timestamp_t sysclock_period = []() { + uint64_t sysclock_hz = 0; + hsa_status_t status = + saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_system_get_info failed"); + return (uint64_t)1000000000 / sysclock_hz; + }(); + + if (entry->type == COPY_ENTRY_TYPE) { + hsa_amd_profiling_async_copy_time_t async_copy_time{}; + hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_get_async_copy_time_fn( + entry->signal, &async_copy_time); + if (status != HSA_STATUS_SUCCESS) + FATAL_LOGGING("hsa_amd_profiling_get_async_copy_time failed"); + entry->begin = async_copy_time.start * sysclock_period; + entry->end = async_copy_time.end * sysclock_period; + } else { + assert(false && "should not reach here"); + } + + hsa_signal_t orig = entry->orig; + hsa_signal_t signal = entry->signal; + + // Releasing completed entry + entry->valid.store(ENTRY_COMPL, std::memory_order_release); + + assert(entry->handler != nullptr); + entry->handler(entry); + + // Original intercepted signal completion + if (orig.handle) { + amd_signal_t* orig_signal_ptr = reinterpret_cast(orig.handle); + amd_signal_t* prof_signal_ptr = reinterpret_cast(signal.handle); + orig_signal_ptr->start_ts = prof_signal_ptr->start_ts; + orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; + + [[maybe_unused]] const hsa_signal_value_t new_value = + saved_core_api.hsa_signal_load_relaxed_fn(orig) - 1; + assert(signal_value == new_value && "Tracker::Complete bad signal value"); + saved_core_api.hsa_signal_store_screlease_fn(orig, signal_value); + } + saved_core_api.hsa_signal_destroy_fn(signal); + delete entry; + } + + // Handler for packet completion + static bool Handler(hsa_signal_value_t signal_value, void* arg) { + // Acquire entry + entry_t* entry = reinterpret_cast(arg); + while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield(); + + // Complete entry + Tracker::Complete(signal_value, entry); + return false; + } +}; + hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) { hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr); if (status != HSA_STATUS_SUCCESS) return status; diff --git a/projects/roctracer/src/roctracer/roctracer.cpp b/projects/roctracer/src/roctracer/roctracer.cpp index 936c40c096..8d837e5479 100644 --- a/projects/roctracer/src/roctracer/roctracer.cpp +++ b/projects/roctracer/src/roctracer/roctracer.cpp @@ -42,7 +42,6 @@ #include "loader.h" #include "hsa_support.h" #include "memory_pool.h" -#include "tracker.h" #include "exception.h" #include "util/logger.h" diff --git a/projects/roctracer/src/roctracer/tracker.h b/projects/roctracer/src/roctracer/tracker.h deleted file mode 100644 index d651c28857..0000000000 --- a/projects/roctracer/src/roctracer/tracker.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SRC_CORE_TRACKER_H_ -#define SRC_CORE_TRACKER_H_ - -#include -#include -#include -#include - -#include - -#include "exception.h" -#include "util/logger.h" - -namespace roctracer { -class Tracker { - public: - enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; - - enum entry_type_t { - DFLT_ENTRY_TYPE = 0, - API_ENTRY_TYPE = 1, - COPY_ENTRY_TYPE = 2, - KERNEL_ENTRY_TYPE = 3, - NUM_ENTRY_TYPE = 4 - }; - - struct entry_t { - std::atomic valid; - entry_type_t type; - uint64_t correlation_id; - roctracer_timestamp_t begin; // begin timestamp, ns - roctracer_timestamp_t end; // end timestamp, ns - hsa_agent_t agent; - uint32_t dev_index; - hsa_signal_t orig; - hsa_signal_t signal; - void (*handler)(const entry_t*); - MemoryPool* pool; - union { - struct { - } copy; - struct { - const char* name; - hsa_agent_t agent; - uint32_t tid; - } kernel; - }; - }; - - // Add tracker entry - inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, - entry_t* entry) { - hsa_status_t status = HSA_STATUS_ERROR; - - // Creating a new tracker entry - entry->type = type; - entry->agent = agent; - entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; - entry->orig = signal; - entry->valid.store(ENTRY_INIT, std::memory_order_release); - - // Creating a proxy signal - status = hsa_signal_create(1, 0, NULL, &(entry->signal)); - if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_signal_create failed"); - status = - hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); - if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_amd_signal_async_handler failed"); - } - - // Delete tracker entry - inline static void Disable(entry_t* entry) { - hsa_signal_destroy(entry->signal); - entry->valid.store(ENTRY_INV, std::memory_order_release); - } - - private: - // Entry completion - inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { - static roctracer_timestamp_t sysclock_period = []() { - uint64_t sysclock_hz = 0; - hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); - if (status != HSA_STATUS_SUCCESS) FATAL_LOGGING("hsa_system_get_info failed"); - return (uint64_t)1000000000 / sysclock_hz; - }(); - - if (entry->type == COPY_ENTRY_TYPE) { - hsa_amd_profiling_async_copy_time_t async_copy_time{}; - hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); - if (status != HSA_STATUS_SUCCESS) - FATAL_LOGGING("hsa_amd_profiling_get_async_copy_time failed"); - entry->begin = async_copy_time.start * sysclock_period; - entry->end = async_copy_time.end * sysclock_period; - } else { - assert(false && "should not reach here"); - } - - hsa_signal_t orig = entry->orig; - hsa_signal_t signal = entry->signal; - - // Releasing completed entry - entry->valid.store(ENTRY_COMPL, std::memory_order_release); - - assert(entry->handler != nullptr); - entry->handler(entry); - - // Original intercepted signal completion - if (orig.handle) { - amd_signal_t* orig_signal_ptr = reinterpret_cast(orig.handle); - amd_signal_t* prof_signal_ptr = reinterpret_cast(signal.handle); - orig_signal_ptr->start_ts = prof_signal_ptr->start_ts; - orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; - - [[maybe_unused]] const hsa_signal_value_t new_value = hsa_signal_load_relaxed(orig) - 1; - assert(signal_value == new_value && "Tracker::Complete bad signal value"); - hsa_signal_store_screlease(orig, signal_value); - } - hsa_signal_destroy(signal); - delete entry; - } - - // Handler for packet completion - static bool Handler(hsa_signal_value_t signal_value, void* arg) { - // Acquire entry - entry_t* entry = reinterpret_cast(arg); - while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield(); - - // Complete entry - Tracker::Complete(signal_value, entry); - return false; - } -}; - -} // namespace roctracer - -#endif // SRC_CORE_TRACKER_H_