From 70c748bddafc0d6fa03a8308245f8b3c63af5483 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 6 Oct 2019 23:13:57 -0500 Subject: [PATCH 01/23] activity records stack [ROCm/roctracer commit: b5a9dc7dd6f1b97b06b2a01bd15420abb227ed93] --- projects/roctracer/inc/ext/prof_protocol.h | 2 +- projects/roctracer/src/core/roctracer.cpp | 58 ++++++++++++++++++++-- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/projects/roctracer/inc/ext/prof_protocol.h b/projects/roctracer/inc/ext/prof_protocol.h index d6e08ca0f3..ab6f83e335 100644 --- a/projects/roctracer/inc/ext/prof_protocol.h +++ b/projects/roctracer/inc/ext/prof_protocol.h @@ -83,7 +83,7 @@ struct activity_record_t { }; // Activity sync calback type -typedef activity_record_t* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); // Activity async calback type typedef void (*activity_id_callback_t)(activity_correlation_id_t id); typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index f9855e663e..2f3d7bb4a6 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -32,6 +32,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include @@ -385,6 +386,20 @@ class MemoryPool { pthread_cond_t read_cond_; }; +// Records storage +struct roctracer_api_data_t { + union { + hip_api_data_t hip; + }; + roctracer_api_data_t() {}; +}; +struct record_pair_t { + roctracer_record_t record; + roctracer_api_data_t data; + record_pair_t() {}; +}; +static thread_local std::stack record_pair_stack; + // Correlation id storage static thread_local activity_correlation_id_t correlation_id_tls = 0; typedef std::map correlation_id_map_t; @@ -407,7 +422,7 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre return it->second; } -roctracer_record_t* HIP_SyncActivityCallback( +void* HIP_SyncActivityCallback( uint32_t op_id, roctracer_record_t* record, const void* callback_data, @@ -416,12 +431,31 @@ roctracer_record_t* HIP_SyncActivityCallback( static hsa_rt_utils::Timer timer; const hip_api_data_t* data = reinterpret_cast(callback_data); + hip_api_data_t* data_ptr = const_cast(data); MemoryPool* pool = reinterpret_cast(arg); - if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL"); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { + + int phase = ACTIVITY_API_PHASE_ENTER; + if (data != NULL) { + phase = data->phase; + } else if (pool != NULL) { + phase = ACTIVITY_API_PHASE_EXIT; + } + + if (phase == ACTIVITY_API_PHASE_ENTER) { + if ((data == NULL) && (pool != NULL)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: pool is not NULL"); + // Allocating a record if NULL passed + if (record == NULL) { + record_pair_stack.push({}); + auto& top = record_pair_stack.top(); + record = &(top.record); + data_ptr = &(top.data.hip); + } + + // Filing record info record->domain = ACTIVITY_DOMAIN_HIP_API; record->op = op_id; record->begin_ns = timer.timestamp_ns(); + // Correlation ID generating uint64_t correlation_id = data->correlation_id; if (correlation_id == 0) { @@ -429,10 +463,23 @@ roctracer_record_t* HIP_SyncActivityCallback( const_cast(data)->correlation_id = correlation_id; } record->correlation_id = correlation_id; + // Passing correlatin ID correlation_id_tls = correlation_id; - return record; + + return data_ptr; } else { + if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: pool is NULL"); + + // Getting record of stacked + if (!record_pair_stack.empty()) { + auto& top = record_pair_stack.top(); + record = &(top.record); + data = &(top.data.hip); + record_pair_stack.pop(); + } + + // Filing record info record->end_ns = timer.timestamp_ns(); record->process_id = syscall(__NR_getpid); record->thread_id = syscall(__NR_gettid); @@ -446,9 +493,12 @@ roctracer_record_t* HIP_SyncActivityCallback( pool->Write(ext_record); } + // Writing record to the buffer pool->Write(*record); + // Clearing correlatin ID correlation_id_tls = 0; + return NULL; } } From 3f73d90d6164e9806870e313630dd48ac541ae4d Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 9 Oct 2019 01:00:05 -0500 Subject: [PATCH 02/23] start/stop API [ROCm/roctracer commit: 72b5494f0bc1259d1fc438682883dae438713aee] --- projects/roctracer/inc/roctracer_ext.h | 8 ++ projects/roctracer/src/core/journal.h | 102 +++++++++++++++ projects/roctracer/src/core/roctracer.cpp | 123 ++++++++++++++++-- .../MatrixTranspose_test/MatrixTranspose.cpp | 17 ++- 4 files changed, 229 insertions(+), 21 deletions(-) create mode 100644 projects/roctracer/src/core/journal.h diff --git a/projects/roctracer/inc/roctracer_ext.h b/projects/roctracer/inc/roctracer_ext.h index 6a1edb9af3..f91927962b 100644 --- a/projects/roctracer/inc/roctracer_ext.h +++ b/projects/roctracer/inc/roctracer_ext.h @@ -39,9 +39,17 @@ extern "C" { #endif // __cplusplus //////////////////////////////////////////////////////////////////////////////// +// Application annotatin API + // Mark API void roctracer_mark(const char* str); +// Tracing start API +void roctracer_start(); + +// Tracing stop API +void roctracer_stop(); + //////////////////////////////////////////////////////////////////////////////// // External correlation id API diff --git a/projects/roctracer/src/core/journal.h b/projects/roctracer/src/core/journal.h new file mode 100644 index 0000000000..f4d8a676b7 --- /dev/null +++ b/projects/roctracer/src/core/journal.h @@ -0,0 +1,102 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef SRC_CORE_JOURNAL_H_ +#define SRC_CORE_JOURNAL_H_ + +#include +#include + +namespace roctracer { + +template +class Journal { + public: + typedef std::mutex mutex_t; + typedef std::map domain_map_t; + typedef std::map journal_map_t; + + struct record_t { + uint32_t domain; + uint32_t op; + Data data; + }; + + Journal() { + domain_mask_ = 0; + map_ = new journal_map_t; + } + + ~Journal() { + for (auto& val : map_) delete val.second; + delete map_; + } + + void registr(const record_t& record) { + std::lock_guard lck(mutex_); + auto* map = get_domain_map(record.domain); + map->insert({record.op, record.data}); + } + + void remove(const record_t& record) { + std::lock_guard lck(mutex_); + auto* map = get_domain_map(record.domain); + map->erase(record.op); + } + + template + F foreach(const F& f_i) { + std::lock_guard lck(mutex_); + F f = f_i; + for (uint32_t domain = 0, mask = domain_mask_; mask != 0; ++domain, mask >>= 1) { + if (mask & 1) { + auto map = get_domain_map(domain); + auto begin = map->begin(); + auto end = map->end(); + for (auto it = begin; it != end; ++it) { + if (f.fun({domain, it->first, it->second}) == false) break; + } + } + } + return f; + } + + private: + domain_map_t* get_domain_map(const uint32_t& domain) { + domain_mask_ |= 1u << domain; + auto domain_it = map_->find(domain); + if (domain_it == map_->end()) { + auto* domain_map = new domain_map_t; + auto ret = map_->insert({domain, domain_map}); + domain_it = ret.first; + } + return domain_it->second; + } + + mutex_t mutex_; + journal_map_t* map_; + uint32_t domain_mask_; +}; + +} // namespace roctracer + +#endif // SRC_CORE_JOURNAL_H_ diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 2f3d7bb4a6..1f64ac859c 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -38,6 +38,7 @@ THE SOFTWARE. #include #include +#include "core/journal.h" #include "core/loader.h" #include "core/trace_buffer.h" #include "proxy/tracker.h" @@ -178,11 +179,52 @@ namespace roctracer { decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn; decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; +typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t; +typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t; +typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t; +typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t; + +struct cb_journal_data_t { + roctracer_rtapi_callback_t callback; + void* user_data; +}; +typedef Journal CbJournal; +CbJournal* cb_journal; + +struct act_journal_data_t { + roctracer_pool_t* pool; +}; +typedef Journal ActJournal; +ActJournal* act_journal; + +template +struct journal_functor_t { + typedef typename T::record_t record_t; + F f_; + journal_functor_t(F f) : f_(f) {} + bool fun(const record_t& record) { + f_((activity_domain_t)record.domain, record.op); + return true; + } +}; +typedef journal_functor_t cb_en_functor_t; +typedef journal_functor_t cb_dis_functor_t; +typedef journal_functor_t act_en_functor_t; +typedef journal_functor_t act_dis_functor_t; +template<> bool cb_en_functor_t::fun(const cb_en_functor_t::record_t& record) { + f_((activity_domain_t)record.domain, record.op, record.data.callback, record.data.user_data); + return true; +} +template<> bool act_en_functor_t::fun(const act_en_functor_t::record_t& record) { + f_((activity_domain_t)record.domain, record.op, record.data.pool); + return true; +} + void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry); void hsa_kernel_handler(::proxy::Tracker::entry_t* entry); TraceBuffer::flush_prm_t trace_buffer_prm[] = { - {roctracer::COPY_ENTRY_TYPE, hsa_async_copy_handler}, - {roctracer::KERNEL_ENTRY_TYPE, hsa_kernel_handler} + {COPY_ENTRY_TYPE, hsa_async_copy_handler}, + {KERNEL_ENTRY_TYPE, hsa_kernel_handler} }; TraceBuffer trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2); @@ -735,8 +777,8 @@ static inline uint32_t get_op_num(const uint32_t& domain) { } // Enable runtime API callbacks -static void roctracer_enable_callback_impl( - uint32_t domain, +static roctracer_status_t roctracer_enable_callback_fun( + roctracer_domain_t domain, uint32_t op, roctracer_rtapi_callback_t callback, void* user_data) @@ -768,6 +810,17 @@ static void roctracer_enable_callback_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_enable_callback_impl( + uint32_t domain, + uint32_t op, + roctracer_rtapi_callback_t callback, + void* user_data) +{ + roctracer::cb_journal->registr({domain, op, {callback, user_data}}); + roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); } PUBLIC_API roctracer_status_t roctracer_enable_op_callback( @@ -805,8 +858,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback( } // Disable runtime API callbacks -static void roctracer_disable_callback_impl( - uint32_t domain, +static roctracer_status_t roctracer_disable_callback_fun( + roctracer_domain_t domain, uint32_t op) { switch (domain) { @@ -833,6 +886,15 @@ static void roctracer_disable_callback_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_disable_callback_impl( + uint32_t domain, + uint32_t op) +{ + roctracer::cb_journal->remove({domain, op, {}}); + roctracer_disable_callback_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_callback( @@ -900,8 +962,8 @@ PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) { } // Enable activity records logging -static void roctracer_enable_activity_impl( - uint32_t domain, +static roctracer_status_t roctracer_enable_activity_fun( + roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) { @@ -933,6 +995,16 @@ static void roctracer_enable_activity_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_enable_activity_impl( + uint32_t domain, + uint32_t op, + roctracer_pool_t* pool) +{ + roctracer::act_journal->registr({domain, op, {pool}}); + roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool); } PUBLIC_API roctracer_status_t roctracer_enable_op_activity( @@ -967,8 +1039,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity( } // Disable activity records logging -static void roctracer_disable_activity_impl( - uint32_t domain, +static roctracer_status_t roctracer_disable_activity_fun( + roctracer_domain_t domain, uint32_t op) { switch (domain) { @@ -993,6 +1065,15 @@ static void roctracer_disable_activity_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_disable_activity_impl( + uint32_t domain, + uint32_t op) +{ + roctracer::act_journal->remove({domain, op, {}}); + roctracer_disable_activity_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_activity( @@ -1061,10 +1142,22 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act // Mark API PUBLIC_API void roctracer_mark(const char* str) { - if (mark_api_callback_ptr) { - mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL); - roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id - } + if (mark_api_callback_ptr) { + mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL); + roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id + } +} + +// Start API +PUBLIC_API void roctracer_start() { + roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); +} + +// Stop API +PUBLIC_API void roctracer_stop() { + roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); } // Set properties @@ -1163,6 +1256,8 @@ PUBLIC_API void OnUnload() { CONSTRUCTOR_API void constructor() { if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); } roctracer::util::Logger::Create(); + if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; + if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); } } diff --git a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp index 7ca24d0f0b..1a4978bab7 100644 --- a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -164,9 +164,9 @@ int main() { free(Matrix); free(TransposeMatrix); free(cpuTransposeMatrix); + } stop_tracing(); - } return errors; } @@ -290,23 +290,26 @@ void activity_callback(const char* begin, const char* end, void* arg) { // Init tracing routine void init_tracing() { - std::cout << "# START #############################" << std::endl << std::flush; + std::cout << "# INIT #############################" << std::endl << std::flush; // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x1000; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); -} - -// Start tracing routine -void start_tracing() { - std::cout << "# START #############################" << std::endl << std::flush; // Enable HIP API callbacks ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL)); // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_activity()); } +// Start tracing routine +void start_tracing() { + std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush; + // Start + if ((iterations & 1) == 1) roctracer_start(); + else roctracer_stop(); +} + // Stop tracing routine void stop_tracing() { ROCTRACER_CALL(roctracer_disable_callback()); From a88c2c7f93024f38287ed995adc9c7b374980a77 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 9 Oct 2019 10:56:19 -0500 Subject: [PATCH 03/23] tool tracing control integration [ROCm/roctracer commit: 9a5a603440974bb5aac1700169985e90573498f0] --- .../test/MatrixTranspose/MatrixTranspose.cpp | 6 + projects/roctracer/test/tool/tracer_tool.cpp | 105 ++++++++++++++---- 2 files changed, 92 insertions(+), 19 deletions(-) diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp index 240723cfe4..8a5ff54a90 100644 --- a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp @@ -82,6 +82,10 @@ int main() { hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + uint32_t iterations = 100; + while (iterations-- > 0) { + std::cout << "## Iteration (" << iterations << ") #################" << std::endl; + // Memory transfer from host to device hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); @@ -112,6 +116,8 @@ int main() { printf("PASSED!\n"); } + } + // free the resources on device side hipFree(gpuMatrix); hipFree(gpuTransposeMatrix); diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 15d24c9a82..556f233bbe 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -25,9 +25,14 @@ THE SOFTWARE. #include /* names denangle */ #include +#include #include -#include /* For SYS_xxx definitions */ +#include +#include /* SYS_xxx definitions */ +#include +#include /* usleep */ +#include #include #include #include @@ -90,24 +95,6 @@ void fatal(const std::string msg) { abort(); } -// KFD API callback function -void kfd_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ - (void)arg; - const kfd_api_data_t* data = reinterpret_cast(callback_data); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { - kfd_begin_timestamp = timer->timestamp_fn_ns(); - } else { - const timestamp_t end_timestamp = timer->timestamp_fn_ns(); - std::ostringstream os; - os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); - fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); - } -} // C++ symbol demangle static inline const char* cxx_demangle(const char* symbol) { size_t funcnamesize; @@ -116,6 +103,37 @@ static inline const char* cxx_demangle(const char* symbol) { return (ret != NULL) ? ret : symbol; } +// Tracing control thread +uint32_t control_delay_us = 0; +uint32_t control_len_us = 0; +uint32_t control_dist_us = 0; +void* control_thr_fun(void*) { + const uint32_t delay_sec = control_delay_us / 1000000; + const uint32_t delay_us = control_delay_us % 1000000; + const uint32_t len_sec = control_len_us / 1000000; + const uint32_t len_us = control_len_us % 1000000; + const uint32_t dist_sec = control_dist_us / 1000000; + const uint32_t dist_us = control_dist_us % 1000000; + bool start = true; + + sleep(delay_sec); + usleep(delay_us); + + while (1) { + if (start) { + start = false; + roctracer_start(); + sleep(len_sec); + usleep(len_us); + } else { + start = true; + roctracer_stop(); + sleep(dist_sec); + usleep(dist_us); + } + } +} + struct hsa_api_trace_entry_t { uint32_t valid; uint32_t type; @@ -328,6 +346,25 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { } } +// KFD API callback function +void kfd_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const kfd_api_data_t* data = reinterpret_cast(callback_data); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + kfd_begin_timestamp = timer->timestamp_fn_ns(); + } else { + const timestamp_t end_timestamp = timer->timestamp_fn_ns(); + std::ostringstream os; + os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); + fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); + } +} + // Input parser std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) { const std::string space_chars_set = " \t"; @@ -436,6 +473,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, std::vector kfd_api_vec; printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); + // XML input const char* xml_name = getenv("ROCP_INPUT"); if (xml_name != NULL) { @@ -556,6 +594,35 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); } + const char* ctrl_str = getenv("ROCP_CTRL_RATE"); + if (ctrl_str != NULL) { + uint32_t ctrl_delay = 0; + uint32_t ctrl_rate = 0; + uint32_t ctrl_len = 0; + int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_rate, &ctrl_len); + if (ret != 3) { + fprintf(stderr, "ROCTracer: control rate value invalid 'delay:rate:length': '%s'\n", ctrl_str); + abort(); + } + if (ctrl_len > ctrl_rate) { + fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate); + abort(); + } + control_dist_us = ctrl_rate - ctrl_len; + control_len_us = ctrl_len; + control_delay_us = ctrl_delay; + + fprintf(stdout, "ROCTracer: Trace control delay(%uus) rate(%uus), len(%uus)\n", ctrl_delay, ctrl_rate, ctrl_len); fflush(stdout); + + roctracer_stop(); + + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, control_thr_fun, NULL); + } + if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); } From 8e998410b7624df64e5c3d58169782c75396b859 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 9 Oct 2019 13:00:27 -0500 Subject: [PATCH 04/23] output cosmetic fix [ROCm/roctracer commit: 2a480667ae4410f8d8e4a168245b91e100457b9a] --- projects/roctracer/test/tool/tracer_tool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 556f233bbe..8ba68a3a12 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -612,7 +612,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, control_len_us = ctrl_len; control_delay_us = ctrl_delay; - fprintf(stdout, "ROCTracer: Trace control delay(%uus) rate(%uus), len(%uus)\n", ctrl_delay, ctrl_rate, ctrl_len); fflush(stdout); + fprintf(stdout, "ROCTracer: trace control delay(%uus), rate(%uus), len(%uus)\n", ctrl_delay, ctrl_rate, ctrl_len); fflush(stdout); roctracer_stop(); From d7591ee8aaf4ab4094ae7a983c2979d87a157eee Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 9 Oct 2019 23:32:01 -0500 Subject: [PATCH 05/23] roctx tool support integration [ROCm/roctracer commit: eb232e7e3cc80de93ce66db87954f6b5b544a743] --- projects/roctracer/test/run.sh | 1 + projects/roctracer/test/tool/tracer_tool.cpp | 145 +++++++++++++++---- 2 files changed, 116 insertions(+), 30 deletions(-) diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index 6840aacc63..9f5da3df9e 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -68,6 +68,7 @@ export ROCTRACER_DOMAIN="hip" # HIP test eval_test "tool HIP test" ./test/MatrixTranspose +eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:1000000:100000 ./test/MatrixTranspose" # HSA test export ROCTRACER_DOMAIN="hsa" diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 8ba68a3a12..0215a2b5f0 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -33,6 +33,7 @@ THE SOFTWARE. #include /* usleep */ #include +#include #include #include #include @@ -65,6 +66,7 @@ hsa_rt_utils::Timer* timer = NULL; thread_local timestamp_t hsa_begin_timestamp = 0; thread_local timestamp_t hip_begin_timestamp = 0; thread_local timestamp_t kfd_begin_timestamp = 0; +bool trace_roctx = false; bool trace_hsa_api = false; bool trace_hsa_activity = false; bool trace_hip = false; @@ -73,6 +75,7 @@ bool trace_kfd = false; LOADER_INSTANTIATE(); // Global output file handle +FILE* roctx_file_handle = NULL; FILE* hsa_api_file_handle = NULL; FILE* hsa_async_copy_file_handle = NULL; FILE* hip_api_file_handle = NULL; @@ -84,6 +87,7 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler void fatal(const std::string msg) { + fflush(roctx_file_handle); fflush(hsa_api_file_handle); fflush(hsa_async_copy_file_handle); fflush(hip_api_file_handle); @@ -134,6 +138,55 @@ void* control_thr_fun(void*) { } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// rocTX annotation tracing + +struct roctx_trace_entry_t { + uint32_t valid; + uint32_t type; + uint32_t cid; + timestamp_t timestamp; + uint32_t pid; + uint32_t tid; + const char* message; +}; + +void roctx_flush_cb(roctx_trace_entry_t* entry); +roctracer::TraceBuffer::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}}; +roctracer::TraceBuffer roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1); + +// rocTX callback function +void roctx_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const roctx_api_data_t* data = reinterpret_cast(callback_data); + const timestamp_t timestamp = timer->timestamp_fn_ns(); + roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); + const char* message = data->args.message; + entry->valid = roctracer::TRACE_ENTRY_COMPL; + entry->type = 0; + entry->cid = cid; + entry->timestamp = timestamp; + entry->pid = GetPid(); + entry->tid = GetTid(); + entry->message = (message != NULL) ? strdup(message) : NULL; +} + +void roctx_flush_cb(roctx_trace_entry_t* entry) { + std::ostringstream os; + os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; + if (entry->message != NULL) os << ":\"" << entry->message << "\""; + else os << ":\"\""; + fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HSA API tracing + struct hsa_api_trace_entry_t { uint32_t valid; uint32_t type; @@ -204,6 +257,9 @@ struct hip_api_trace_entry_t { void* ptr; }; +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HIP API tracing + void hip_api_flush_cb(hip_api_trace_entry_t* entry); roctracer::TraceBuffer::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}}; roctracer::TraceBuffer hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1); @@ -346,6 +402,9 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// KFD API tracing + // KFD API callback function void kfd_api_callback( uint32_t domain, @@ -365,6 +424,8 @@ void kfd_api_callback( } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// + // Input parser std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) { const std::string space_chars_set = " \t"; @@ -441,21 +502,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // API traces switches const char* trace_domain = getenv("ROCTRACER_DOMAIN"); if (trace_domain != NULL) { - if (strncmp(trace_domain, "hsa", 3) == 0) { + if (std::string(trace_domain).find("roctx") != std::string::npos) { + trace_roctx = true; + } + if (std::string(trace_domain).find("hsa") != std::string::npos) { trace_hsa_api = true; trace_hsa_activity = true; } - if (strncmp(trace_domain, "hip", 3) == 0) { + if (std::string(trace_domain).find("hip") != std::string::npos) { trace_hip = true; } - if (strncmp(trace_domain, "sys", 3) == 0) { + if (std::string(trace_domain).find("sys") != std::string::npos) { trace_hsa_api = true; trace_hip = true; } + if (std::string(trace_domain).find("kfd") != std::string::npos) { + trace_kfd = true; + } } - trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0); - // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); if (output_prefix != NULL) { @@ -496,16 +561,15 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, break; } + if (name == "rocTX") { + found = true; + trace_roctx = true; + } if (name == "HSA") { found = true; trace_hsa_api = true; hsa_api_vec = api_vec; } - if (name == "KFD") { - found = true; - trace_kfd = true; - kfd_api_vec = api_vec; - } if (name == "GPU") { found = true; trace_hsa_activity = true; @@ -514,13 +578,26 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, found = true; trace_hip = true; } + if (name == "KFD") { + found = true; + trace_kfd = true; + kfd_api_vec = api_vec; + } } if (found) printf("input from \"%s\"", xml_name); } printf("\n"); - // Enable HSA API callbacks + // Enable rpcTX callbacks + if (trace_roctx) { + roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt"); + + fprintf(stdout, " rocTX-trace()\n"); fflush(stdout); + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_callback, NULL)); + } + + // Enable HSA API callbacks/activity if (trace_hsa_api) { hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); @@ -542,25 +619,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } - if (trace_kfd) { - kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); - // initialize KFD tracing - roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL); - - printf(" KFD-trace("); - if (kfd_api_vec.size() != 0) { - for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { - uint32_t cid = KFD_API_ID_NUMBER; - const char* api = kfd_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid)); - ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); - printf(" %s", api); - } - } else { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL)); - } - printf(")\n"); - } if (trace_hsa_activity) { hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); @@ -623,6 +681,27 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, err = pthread_create(&thread, &attr, control_thr_fun, NULL); } + // Enable KFD API callbacks/activity + if (trace_kfd) { + kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); + // initialize KFD tracing + roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL); + + printf(" KFD-trace("); + if (kfd_api_vec.size() != 0) { + for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { + uint32_t cid = KFD_API_ID_NUMBER; + const char* api = kfd_api_vec[i].c_str(); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid)); + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); + printf(" %s", api); + } + } else { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL)); + } + printf(")\n"); + } + if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); } @@ -637,6 +716,12 @@ void tool_unload(bool destruct) { is_unloaded = true; roctracer_unload(destruct); + if (trace_roctx) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); + + roctx_trace_buffer.Flush(); + close_output_file(roctx_file_handle); + } if (trace_hsa_api) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); From 95af54bc171bff475ad2905816332a9e848eba0e Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 10 Oct 2019 19:02:40 -0500 Subject: [PATCH 06/23] changing trac control to [ROCm/roctracer commit: b10ec59e6c610425fd0a1f6f1f3405bb9d8ba91b] --- projects/roctracer/test/run.sh | 3 ++- projects/roctracer/test/tool/tracer_tool.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index 9f5da3df9e..e5170a1416 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -68,7 +68,8 @@ export ROCTRACER_DOMAIN="hip" # HIP test eval_test "tool HIP test" ./test/MatrixTranspose -eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:1000000:100000 ./test/MatrixTranspose" +# with trace sampling control +eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" # HSA test export ROCTRACER_DOMAIN="hsa" diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 0215a2b5f0..bdf1f25321 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -655,11 +655,11 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, const char* ctrl_str = getenv("ROCP_CTRL_RATE"); if (ctrl_str != NULL) { uint32_t ctrl_delay = 0; - uint32_t ctrl_rate = 0; uint32_t ctrl_len = 0; - int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_rate, &ctrl_len); + uint32_t ctrl_rate = 0; + int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); if (ret != 3) { - fprintf(stderr, "ROCTracer: control rate value invalid 'delay:rate:length': '%s'\n", ctrl_str); + fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str); abort(); } if (ctrl_len > ctrl_rate) { @@ -670,7 +670,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, control_len_us = ctrl_len; control_delay_us = ctrl_delay; - fprintf(stdout, "ROCTracer: trace control delay(%uus), rate(%uus), len(%uus)\n", ctrl_delay, ctrl_rate, ctrl_len); fflush(stdout); + fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); roctracer_stop(); From 62c9b259241f60b65f2732eb79e1fe4becff5202 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 15 Oct 2019 11:15:48 -0500 Subject: [PATCH 07/23] conditional building of kfd wrapper [ROCm/roctracer commit: b2c23f63d5fbf8684ec035d5519817884884eef2] --- projects/roctracer/inc/roctracer_ext.h | 3 --- projects/roctracer/src/CMakeLists.txt | 21 ++++++++++++------- .../test/MatrixTranspose/MatrixTranspose.cpp | 4 ++++ .../MatrixTranspose_test/MatrixTranspose.cpp | 10 +++++---- projects/roctracer/test/run.sh | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/projects/roctracer/inc/roctracer_ext.h b/projects/roctracer/inc/roctracer_ext.h index f91927962b..d555055d0b 100644 --- a/projects/roctracer/inc/roctracer_ext.h +++ b/projects/roctracer/inc/roctracer_ext.h @@ -41,9 +41,6 @@ extern "C" { //////////////////////////////////////////////////////////////////////////////// // Application annotatin API -// Mark API -void roctracer_mark(const char* str); - // Tracing start API void roctracer_start(); diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index bf0fe6c06a..4966827737 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -12,16 +12,21 @@ set ( LIB_SRC add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) + +# Generating HSA tracing primitives execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" ) -set ( KFD_LIB "kfdwrapper64" ) -set ( KFD_LIB_SRC - ${LIB_DIR}/kfd/kfd_wrapper.cpp -) -add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) -target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) -target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) -execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) +# Generating KFD/Thunk tracing primitives +if ( DEFINED KFD_WRAPPER ) + set ( KFD_LIB "kfdwrapper64" ) + set ( KFD_LIB_SRC + ${LIB_DIR}/kfd/kfd_wrapper.cpp + ) + add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) + target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) + target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) +endif() set ( ROCTX_LIB "roctx64" ) set ( ROCTX_LIB_SRC diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp index 8a5ff54a90..d2ecfb8484 100644 --- a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp @@ -36,6 +36,10 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Y 4 #define THREADS_PER_BLOCK_Z 1 +// Mark API +extern "C" +void roctracer_mark(const char* str); + // Device (Kernel) function, it must be void __global__ void matrixTranspose(float* out, float* in, const int width) { int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; diff --git a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp index 1a4978bab7..c2c50ebc41 100644 --- a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -297,9 +297,10 @@ void init_tracing() { properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); // Enable HIP API callbacks - ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL)); // Enable HIP activity tracing - ROCTRACER_CALL(roctracer_enable_activity()); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); } // Start tracing routine @@ -312,8 +313,9 @@ void start_tracing() { // Stop tracing routine void stop_tracing() { - ROCTRACER_CALL(roctracer_disable_callback()); - ROCTRACER_CALL(roctracer_disable_activity()); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); ROCTRACER_CALL(roctracer_flush_activity()); std::cout << "# STOP #############################" << std::endl << std::flush; } diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index e5170a1416..3b383da201 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -59,7 +59,7 @@ eval_test() { # Standalone test # rocTrecer is used explicitely by test -eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" +eval_test "standalone HIP test" "./test/MatrixTranspose_test" # Tool test # rocTracer/tool is loaded by HSA runtime From c0a2a235d067599aed3f716fa468877249819301 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 16 Oct 2019 12:18:13 -0500 Subject: [PATCH 08/23] KFD_WRAPPER macro [ROCm/roctracer commit: c2cb1f2ab99e35721e4eb1ca51bb85adee3d1d2d] --- projects/roctracer/cmake_modules/env.cmake | 5 +++++ projects/roctracer/src/core/roctracer.cpp | 14 ++++++++++++++ projects/roctracer/test/tool/tracer_tool.cpp | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/projects/roctracer/cmake_modules/env.cmake b/projects/roctracer/cmake_modules/env.cmake index fbeccf5d09..5627df083f 100644 --- a/projects/roctracer/cmake_modules/env.cmake +++ b/projects/roctracer/cmake_modules/env.cmake @@ -43,6 +43,11 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) add_definitions ( -DDEBUG_TRACE=1 ) endif() +## Enable KFD wrapper +if ( DEFINED KFD_WRAPPER ) + add_definitions ( -DKFD_WRAPPER=1 ) +endif() + ## Enable HIP/HCC local build if ( DEFINED LOCAL_BUILD ) add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} ) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 1f64ac859c..1085696a22 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -26,7 +26,9 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" +#ifdef KFD_WRAPPER #include "inc/roctracer_kfd.h" +#endif #include #include @@ -726,10 +728,12 @@ PUBLIC_API const char* roctracer_op_string( return roctracer::HipLoader::Instance().ApiName(op); break; } +#if KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { return roctracer::kfd_support::GetApiName(op); break; } +#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -750,11 +754,13 @@ PUBLIC_API roctracer_status_t roctracer_op_code( if (kind != NULL) *kind = 0; break; } +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { *op = roctracer::kfd_support::GetApiCode(str); if (kind != NULL) *kind = 0; break; } +#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")"); } @@ -767,7 +773,9 @@ static inline uint32_t get_op_num(const uint32_t& domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; +#endif case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: @@ -784,11 +792,13 @@ static roctracer_status_t roctracer_enable_callback_fun( void* user_data) { switch (domain) { +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); break; } +#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { roctracer::hsa_support::cb_table.set(op, callback, user_data); @@ -863,11 +873,13 @@ static roctracer_status_t roctracer_disable_callback_fun( uint32_t op) { switch (domain) { +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error"); break; } +#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; @@ -1191,10 +1203,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( break; } +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { roctracer::kfd_support::intercept_KFDApiTable(); break; } +#endif case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index bdf1f25321..9e7dab49b0 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -37,7 +37,9 @@ THE SOFTWARE. #include #include #include +#ifdef KFD_WRAPPER #include +#endif #include #include #include @@ -406,6 +408,7 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { // KFD API tracing // KFD API callback function +#ifdef KFD_WRAPPER void kfd_api_callback( uint32_t domain, uint32_t cid, @@ -423,6 +426,7 @@ void kfd_api_callback( fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); } } +#endif /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -681,6 +685,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, err = pthread_create(&thread, &attr, control_thr_fun, NULL); } +#ifdef KFD_WRAPPER // Enable KFD API callbacks/activity if (trace_kfd) { kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); @@ -701,6 +706,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } printf(")\n"); } +#endif if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); From 9e2512a9c5f48fb75d4c00b38135ccc060a27afd Mon Sep 17 00:00:00 2001 From: Xiaozhu Meng Date: Fri, 18 Oct 2019 14:24:00 -0500 Subject: [PATCH 09/23] Install libkfdwrapper64.so [ROCm/roctracer commit: 017e792506689473690eaf3be64d82e218dae2ea] --- projects/roctracer/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/roctracer/CMakeLists.txt b/projects/roctracer/CMakeLists.txt index 04136e0081..f14ba0b9e4 100644 --- a/projects/roctracer/CMakeLists.txt +++ b/projects/roctracer/CMakeLists.txt @@ -109,6 +109,9 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) +## kfdwrapper +install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) + ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" ) From 05a07a9e7f73660e7cb232d9f5169b01d75c112b Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 21 Oct 2019 21:18:54 -0500 Subject: [PATCH 10/23] roctx/start/stop interaction [ROCm/roctracer commit: 57051220a11b9a22126c5d714fecae8416a2d228] --- projects/roctracer/inc/ext/prof_protocol.h | 1 + projects/roctracer/inc/roctracer_ext.h | 7 + projects/roctracer/inc/roctracer_roctx.h | 8 + projects/roctracer/src/core/loader.h | 8 +- projects/roctracer/src/core/memory_pool.h | 210 +++++++++++++++++++ projects/roctracer/src/core/roctracer.cpp | 202 ++---------------- projects/roctracer/src/roctx/roctx.cpp | 56 +++-- projects/roctracer/test/tool/tracer_tool.cpp | 60 ++++-- 8 files changed, 345 insertions(+), 207 deletions(-) create mode 100644 projects/roctracer/src/core/memory_pool.h diff --git a/projects/roctracer/inc/ext/prof_protocol.h b/projects/roctracer/inc/ext/prof_protocol.h index ab6f83e335..6d9cd62714 100644 --- a/projects/roctracer/inc/ext/prof_protocol.h +++ b/projects/roctracer/inc/ext/prof_protocol.h @@ -31,6 +31,7 @@ typedef enum { ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain diff --git a/projects/roctracer/inc/roctracer_ext.h b/projects/roctracer/inc/roctracer_ext.h index d555055d0b..c2f5c54542 100644 --- a/projects/roctracer/inc/roctracer_ext.h +++ b/projects/roctracer/inc/roctracer_ext.h @@ -34,6 +34,13 @@ THE SOFTWARE. #include "roctracer.h" +typedef void (*roctracer_start_cb_t)(); +typedef void (*roctracer_stop_cb_t)(); +typedef struct { + roctracer_start_cb_t start_cb; + roctracer_stop_cb_t stop_cb; +} roctracer_ext_properties_t; + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/projects/roctracer/inc/roctracer_roctx.h b/projects/roctracer/inc/roctracer_roctx.h index aaa95703db..329e974d61 100644 --- a/projects/roctracer/inc/roctracer_roctx.h +++ b/projects/roctracer/inc/roctracer_roctx.h @@ -77,6 +77,14 @@ bool RegisterApiCallback(uint32_t op, void* callback, void* arg); // Remove ROCTX callback for given opertaion id bool RemoveApiCallback(uint32_t op); +// Iterate range stack to support tracing start/stop +typedef struct { + const char* message; + uint32_t tid; +} roctx_range_data_t; +typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg); +void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg); + #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index dfddb11a20..2da49aa8a7 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -141,20 +141,24 @@ class KfdApi { }; // rocTX runtime library loader class +#include "inc/roctracer_roctx.h" class RocTxApi { public: typedef BaseLoader Loader; - typedef bool (RegisterApiCallback_t)(uint32_t op, void* callback, void* arg); - typedef bool (RemoveApiCallback_t)(uint32_t op); + typedef decltype(RegisterApiCallback) RegisterApiCallback_t; + typedef decltype(RemoveApiCallback) RemoveApiCallback_t; + typedef decltype(RangeStackIterate) RangeStackIterate_t; RegisterApiCallback_t* RegisterApiCallback; RemoveApiCallback_t* RemoveApiCallback; + RangeStackIterate_t* RangeStackIterate; protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("RegisterApiCallback"); RemoveApiCallback = loader->GetFun("RemoveApiCallback"); + RangeStackIterate = loader->GetFun("RangeStackIterate"); } }; diff --git a/projects/roctracer/src/core/memory_pool.h b/projects/roctracer/src/core/memory_pool.h new file mode 100644 index 0000000000..fe2f1a6d3a --- /dev/null +++ b/projects/roctracer/src/core/memory_pool.h @@ -0,0 +1,210 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef MEMORY_POOL_H_ +#define MEMORY_POOL_H_ + +#include +#include + +#include +#include + +#include "util/exception.h" + +#define PTHREAD_CALL(call) \ + do { \ + int err = call; \ + if (err != 0) { \ + errno = err; \ + perror(#call); \ + abort(); \ + } \ + } while (0) + +namespace roctracer { + +class MemoryPool { + public: + typedef std::mutex mutex_t; + + static void allocator_default(char** ptr, size_t size, void* arg) { + (void)arg; + if (*ptr == NULL) { + *ptr = reinterpret_cast(malloc(size)); + } else if (size != 0) { + *ptr = reinterpret_cast(realloc(ptr, size)); + } else { + free(*ptr); + *ptr = NULL; + } + } + + MemoryPool(const roctracer_properties_t& properties) { + // Assigning pool allocator + alloc_fun_ = allocator_default; + alloc_arg_ = NULL; + if (properties.alloc_fun != NULL) { + alloc_fun_ = properties.alloc_fun; + alloc_arg_ = properties.alloc_arg; + } + + // Pool definition + buffer_size_ = properties.buffer_size; + const size_t pool_size = 2 * buffer_size_; + pool_begin_ = NULL; + alloc_fun_(&pool_begin_, pool_size, alloc_arg_); + if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed"); + pool_end_ = pool_begin_ + pool_size; + buffer_begin_ = pool_begin_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + + // Consuming read thread + read_callback_fun_ = properties.buffer_callback_fun; + read_callback_arg_ = properties.buffer_callback_arg; + consumer_arg_.set(this, NULL, NULL, true); + PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL)); + PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL)); + PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_)); + } + + ~MemoryPool() { + Flush(); + PTHREAD_CALL(pthread_cancel(consumer_thread_)); + void *res; + PTHREAD_CALL(pthread_join(consumer_thread_, &res)); + if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); + allocator_default(&pool_begin_, 0, alloc_arg_); + } + + template + void Write(const Record& record) { + std::lock_guard lock(write_mutex_); + getRecord(record); + } + + void Flush() { + std::lock_guard lock(write_mutex_); + if (write_ptr_ > buffer_begin_) { + spawn_reader(buffer_begin_, write_ptr_); + sync_reader(&consumer_arg_); + buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + } + } + + private: + struct consumer_arg_t { + MemoryPool* obj; + const char* begin; + const char* end; + volatile std::atomic valid; + void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) { + obj = obj_p; + begin = begin_p; + end = end_p; + valid.store(valid_p); + } + }; + + template + Record* getRecord(const Record& init) { + char* next = write_ptr_ + sizeof(Record); + if (next > buffer_end_) { + if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); + spawn_reader(buffer_begin_, write_ptr_); + buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + next = write_ptr_ + sizeof(Record); + } + + Record* ptr = reinterpret_cast(write_ptr_); + write_ptr_ = next; + + *ptr = init; + return ptr; + } + + static void reset_reader(consumer_arg_t* arg) { + arg->valid.store(false); + } + + static void sync_reader(const consumer_arg_t* arg) { + while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield()); + } + + static void* reader_fun(void* consumer_arg) { + consumer_arg_t* arg = reinterpret_cast(consumer_arg); + roctracer::MemoryPool* obj = arg->obj; + + reset_reader(arg); + + while (1) { + PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_))); + while (arg->valid.load() == false) { + PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_))); + } + + obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_); + reset_reader(arg); + PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_))); + } + + return NULL; + } + + void spawn_reader(const char* data_begin, const char* data_end) { + sync_reader(&consumer_arg_); + PTHREAD_CALL(pthread_mutex_lock(&read_mutex_)); + consumer_arg_.set(this, data_begin, data_end, true); + PTHREAD_CALL(pthread_cond_signal(&read_cond_)); + PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_)); + } + + // pool allocator + roctracer_allocator_t alloc_fun_; + void* alloc_arg_; + + // Pool definition + size_t buffer_size_; + char* pool_begin_; + char* pool_end_; + char* buffer_begin_; + char* buffer_end_; + char* write_ptr_; + mutex_t write_mutex_; + + // Consuming read thread + roctracer_buffer_callback_t read_callback_fun_; + void* read_callback_arg_; + consumer_arg_t consumer_arg_; + pthread_t consumer_thread_; + pthread_mutex_t read_mutex_; + pthread_cond_t read_cond_; +}; + +} // namespace roctracer + +#endif // MEMORY_POOL_H_ diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 1085696a22..55428a2aba 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -23,6 +23,7 @@ THE SOFTWARE. #include "inc/roctracer.h" #include "inc/roctracer_hcc.h" #include "inc/roctracer_hip.h" +#include "inc/roctracer_ext.h" #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" @@ -30,18 +31,19 @@ THE SOFTWARE. #include "inc/roctracer_kfd.h" #endif +#include +#include +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include #include "core/journal.h" #include "core/loader.h" +#include "core/memory_pool.h" #include "core/trace_buffer.h" #include "proxy/tracker.h" #include "ext/hsa_rt_utils.hpp" @@ -58,16 +60,6 @@ THE SOFTWARE. #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) -#define PTHREAD_CALL(call) \ - do { \ - int err = call; \ - if (err != 0) { \ - errno = err; \ - perror(#call); \ - abort(); \ - } \ - } while (0) - #define HIPAPI_CALL(call) \ do { \ hipError_t err = call; \ @@ -244,7 +236,12 @@ CoreApiTable CoreApiTable_saved{}; AmdExtTable AmdExtTable_saved{}; // Table of function pointers to HSA Image Extension ImageExtTable ImageExtTable_saved{}; -} +} // namespace hsa_support + +namespace ext_support { +roctracer_start_cb_t roctracer_start_cb = NULL; +roctracer_stop_cb_t roctracer_stop_cb = NULL; +} // namespace ext_suppoprt roctracer_status_t GetExcStatus(const std::exception& e) { const util::exception* roctracer_exc_ptr = dynamic_cast(&e); @@ -268,168 +265,6 @@ class GlobalCounter { GlobalCounter::mutex_t GlobalCounter::mutex_; GlobalCounter::counter_t GlobalCounter::counter_ = 0; -class MemoryPool { - public: - typedef std::mutex mutex_t; - - static void allocator_default(char** ptr, size_t size, void* arg) { - (void)arg; - if (*ptr == NULL) { - *ptr = reinterpret_cast(malloc(size)); - } else if (size != 0) { - *ptr = reinterpret_cast(realloc(ptr, size)); - } else { - free(*ptr); - *ptr = NULL; - } - } - - MemoryPool(const roctracer_properties_t& properties) { - // Assigning pool allocator - alloc_fun_ = allocator_default; - alloc_arg_ = NULL; - if (properties.alloc_fun != NULL) { - alloc_fun_ = properties.alloc_fun; - alloc_arg_ = properties.alloc_arg; - } - - // Pool definition - buffer_size_ = properties.buffer_size; - const size_t pool_size = 2 * buffer_size_; - pool_begin_ = NULL; - alloc_fun_(&pool_begin_, pool_size, alloc_arg_); - if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed"); - pool_end_ = pool_begin_ + pool_size; - buffer_begin_ = pool_begin_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - - // Consuming read thread - read_callback_fun_ = properties.buffer_callback_fun; - read_callback_arg_ = properties.buffer_callback_arg; - consumer_arg_.set(this, NULL, NULL, true); - PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL)); - PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL)); - PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_)); - } - - ~MemoryPool() { - Flush(); - PTHREAD_CALL(pthread_cancel(consumer_thread_)); - void *res; - PTHREAD_CALL(pthread_join(consumer_thread_, &res)); - if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); - allocator_default(&pool_begin_, 0, alloc_arg_); - } - - template - void Write(const Record& record) { - std::lock_guard lock(write_mutex_); - getRecord(record); - } - - void Flush() { - std::lock_guard lock(write_mutex_); - if (write_ptr_ > buffer_begin_) { - spawn_reader(buffer_begin_, write_ptr_); - sync_reader(&consumer_arg_); - buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - } - } - - private: - struct consumer_arg_t { - MemoryPool* obj; - const char* begin; - const char* end; - volatile std::atomic valid; - void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) { - obj = obj_p; - begin = begin_p; - end = end_p; - valid.store(valid_p); - } - }; - - template - Record* getRecord(const Record& init) { - char* next = write_ptr_ + sizeof(Record); - if (next > buffer_end_) { - if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); - spawn_reader(buffer_begin_, write_ptr_); - buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - next = write_ptr_ + sizeof(Record); - } - - Record* ptr = reinterpret_cast(write_ptr_); - write_ptr_ = next; - - *ptr = init; - return ptr; - } - - static void reset_reader(consumer_arg_t* arg) { - arg->valid.store(false); - } - - static void sync_reader(const consumer_arg_t* arg) { - while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield()); - } - - static void* reader_fun(void* consumer_arg) { - consumer_arg_t* arg = reinterpret_cast(consumer_arg); - roctracer::MemoryPool* obj = arg->obj; - - reset_reader(arg); - - while (1) { - PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_))); - while (arg->valid.load() == false) { - PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_))); - } - - obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_); - reset_reader(arg); - PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_))); - } - - return NULL; - } - - void spawn_reader(const char* data_begin, const char* data_end) { - sync_reader(&consumer_arg_); - PTHREAD_CALL(pthread_mutex_lock(&read_mutex_)); - consumer_arg_.set(this, data_begin, data_end, true); - PTHREAD_CALL(pthread_cond_signal(&read_cond_)); - PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_)); - } - - // pool allocator - roctracer_allocator_t alloc_fun_; - void* alloc_arg_; - - // Pool definition - size_t buffer_size_; - char* pool_begin_; - char* pool_end_; - char* buffer_begin_; - char* buffer_end_; - char* write_ptr_; - mutex_t write_mutex_; - - // Consuming read thread - roctracer_buffer_callback_t read_callback_fun_; - void* read_callback_arg_; - consumer_arg_t consumer_arg_; - pthread_t consumer_thread_; - pthread_mutex_t read_mutex_; - pthread_cond_t read_cond_; -}; - // Records storage struct roctracer_api_data_t { union { @@ -1162,6 +997,7 @@ PUBLIC_API void roctracer_mark(const char* str) { // Start API PUBLIC_API void roctracer_start() { + if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); } @@ -1170,6 +1006,7 @@ PUBLIC_API void roctracer_start() { PUBLIC_API void roctracer_stop() { roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); + if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); } // Set properties @@ -1222,6 +1059,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB"); if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name); mark_api_callback_ptr = reinterpret_cast(properties); + break; + } + case ACTIVITY_DOMAIN_EXT_API: { + roctracer_ext_properties_t* ops_properties = reinterpret_cast(properties); + roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb; + roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb; + break; } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); diff --git a/projects/roctracer/src/roctx/roctx.cpp b/projects/roctracer/src/roctx/roctx.cpp index 61916f146a..fcf379f0fe 100644 --- a/projects/roctracer/src/roctx/roctx.cpp +++ b/projects/roctracer/src/roctx/roctx.cpp @@ -24,11 +24,13 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #include +#include +#include +#include #include "inc/ext/prof_protocol.h" #include "util/exception.h" #include "util/logger.h" -#include #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) @@ -62,12 +64,8 @@ THE SOFTWARE. (void)err; \ return X; -static thread_local std::stack message_stack; - -#if 0 -static inline uint32_t GetPid() { return syscall(__NR_getpid); } -static inline uint32_t GetTid() { return syscall(__NR_gettid); } -#endif +inline uint32_t GetPid() { return syscall(__NR_getpid); } +inline uint32_t GetTid() { return syscall(__NR_gettid); } //////////////////////////////////////////////////////////////////////////////// // Library errors enumaration @@ -80,12 +78,27 @@ typedef enum { // Library implementation // namespace roctx { +typedef std::stack message_stack_t; +typedef std::map thread_map_t; +typedef std::mutex map_mutex_t; +map_mutex_t map_mutex; +thread_map_t* thread_map = NULL; +static thread_local message_stack_t* message_stack = NULL; roctx_status_t GetExcStatus(const std::exception& e) { const roctracer::util::exception* roctx_exc_ptr = dynamic_cast(&e); return (roctx_exc_ptr) ? static_cast(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR; } +void thread_data_init() { + message_stack = new message_stack_t; + const auto tid = GetTid(); + + std::lock_guard lck(map_mutex); + if (thread_map == NULL) thread_map = new thread_map_t; + (*thread_map)[tid] = message_stack; +} + // callbacks table extern cb_table_t cb_table; } // namespace roctx @@ -119,31 +132,50 @@ PUBLIC_API void roctxMarkA(const char* message) { PUBLIC_API int roctxRangePushA(const char* message) { API_METHOD_PREFIX + if (roctx::message_stack == NULL) roctx::thread_data_init(); + roctx_api_data_t api_data{}; api_data.args.roctxRangePushA.message = strdup(message); activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg); if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg); - message_stack.push(strdup(message)); + roctx::message_stack->push(strdup(message)); + + return roctx::message_stack->size() - 1; API_METHOD_CATCH(-1); - return message_stack.size()-1; } PUBLIC_API int roctxRangePop() { API_METHOD_PREFIX + if (roctx::message_stack == NULL) roctx::thread_data_init(); + roctx_api_data_t api_data{}; activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg); if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg); - if (message_stack.empty()) { + if (roctx::message_stack->empty()) { EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!"); } else { - message_stack.pop(); + roctx::message_stack->pop(); } + + return roctx::message_stack->size(); API_METHOD_CATCH(-1) - return message_stack.size(); +} + +PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) { + for (const auto& entry : *roctx::thread_map) { + const auto tid = entry.first; + for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){ + std::string message = stack.top(); + roctx_range_data_t data{}; + data.message = message.c_str(); + data.tid = tid; + callback(&data, arg); + } + } } } // extern "C" diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 9e7dab49b0..f21b0c7f7e 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -158,7 +158,24 @@ roctracer::TraceBuffer::flush_prm_t roctx_flush_prm[1] = {{ roctracer::TraceBuffer roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1); // rocTX callback function -void roctx_callback( +static inline void roctx_callback_fun( + uint32_t domain, + uint32_t cid, + uint32_t tid, + const char* message) +{ + const timestamp_t timestamp = timer->timestamp_fn_ns(); + roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); + entry->valid = roctracer::TRACE_ENTRY_COMPL; + entry->type = 0; + entry->cid = cid; + entry->timestamp = timestamp; + entry->pid = GetPid(); + entry->tid = tid; + entry->message = (message != NULL) ? strdup(message) : NULL; +} + +void roctx_api_callback( uint32_t domain, uint32_t cid, const void* callback_data, @@ -166,16 +183,23 @@ void roctx_callback( { (void)arg; const roctx_api_data_t* data = reinterpret_cast(callback_data); - const timestamp_t timestamp = timer->timestamp_fn_ns(); - roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); - const char* message = data->args.message; - entry->valid = roctracer::TRACE_ENTRY_COMPL; - entry->type = 0; - entry->cid = cid; - entry->timestamp = timestamp; - entry->pid = GetPid(); - entry->tid = GetTid(); - entry->message = (message != NULL) ? strdup(message) : NULL; + roctx_callback_fun(domain, cid, GetTid(), data->args.message); +} + +// Start/Stop callbacks +void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) { + const bool* is_stop_ptr = (bool*)arg; + const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA; + const char* message = (*is_stop_ptr == true) ? NULL : data->message; + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message); +} +void stop_callback() { + bool is_stop = true; + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); +} +void start_callback() { + bool is_stop = false; + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); } void roctx_flush_cb(roctx_trace_entry_t* entry) { @@ -597,8 +621,15 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, if (trace_roctx) { roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt"); + // initialize HSA tracing + roctracer_ext_properties_t properties { + start_callback, + stop_callback + }; + roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties); + fprintf(stdout, " rocTX-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); } // Enable HSA API callbacks/activity @@ -627,11 +658,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties{ + roctracer::hsa_ops_properties_t ops_properties { table, reinterpret_cast(hsa_activity_callback), NULL, - output_prefix}; + output_prefix + }; roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); From 693111434bc649d22241a82dc55ad906b60c10dd Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 21 Oct 2019 23:03:20 -0500 Subject: [PATCH 11/23] hip-vdi support [ROCm/roctracer commit: 72a2571b221bca9dc0860a1167d17d0bb94dba49] --- projects/roctracer/cmake_modules/env.cmake | 11 +++++++- projects/roctracer/inc/roctracer_hcc.h | 15 +++++++++++ projects/roctracer/src/core/loader.h | 26 +++++++++---------- projects/roctracer/src/core/roctracer.cpp | 4 +-- projects/roctracer/test/CMakeLists.txt | 2 +- .../test/MatrixTranspose_test/Makefile | 3 ++- .../MatrixTranspose_test/MatrixTranspose.cpp | 4 +-- 7 files changed, 45 insertions(+), 20 deletions(-) diff --git a/projects/roctracer/cmake_modules/env.cmake b/projects/roctracer/cmake_modules/env.cmake index 5627df083f..727cd0839e 100644 --- a/projects/roctracer/cmake_modules/env.cmake +++ b/projects/roctracer/cmake_modules/env.cmake @@ -43,9 +43,16 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) add_definitions ( -DDEBUG_TRACE=1 ) endif() +## Enable KFD wrapper +if ( DEFINED HIP_VDI ) + add_definitions ( -DHIP_VDI=${HIP_VDI} ) +else() + set ( HIP_VDI 0 ) +endif() + ## Enable KFD wrapper if ( DEFINED KFD_WRAPPER ) - add_definitions ( -DKFD_WRAPPER=1 ) + add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} ) endif() ## Enable HIP/HCC local build @@ -128,5 +135,7 @@ message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" ) message ( "-------------HCC-Inc: ${HCC_INC_DIR}" ) message ( "-------------HIP-Inc: ${HIP_INC_DIR}" ) message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" ) +message ( "-------------HIP-VDI: ${HIP_VDI}" ) +message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" ) message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" ) message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" ) diff --git a/projects/roctracer/inc/roctracer_hcc.h b/projects/roctracer/inc/roctracer_hcc.h index 7caee7c9ad..252b984d2a 100644 --- a/projects/roctracer/inc/roctracer_hcc.h +++ b/projects/roctracer/inc/roctracer_hcc.h @@ -23,11 +23,26 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HCC_H_ #define INC_ROCTRACER_HCC_H_ +#if HIP_VDI +#define HIP_OP_ID_NUMBER 3 +#define HIP_OP_ID_COPY 1 +extern "C" { +typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); +typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); +typedef const char* (hipGetOpName_t)(unsigned op); +} +#else // !HIP_VDI #if LOCAL_BUILD #include #else #include #endif +#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER +#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY +typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t; +typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t; +typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t; +#endif // !HIP_VDI #include "roctracer.h" diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index 2da49aa8a7..1c4005dd3b 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -99,26 +99,26 @@ class HipApi { }; // HCC runtime library loader class +#include "inc/roctracer_hcc.h" class HccApi { public: typedef BaseLoader Loader; - typedef decltype(Kalmar::CLAMP::InitActivityCallback) InitActivityCallback_t; - typedef decltype(Kalmar::CLAMP::EnableActivityCallback) EnableActivityCallback_t; - typedef decltype(Kalmar::CLAMP::GetCmdName) GetCmdName_t; - - InitActivityCallback_t* InitActivityCallback; - EnableActivityCallback_t* EnableActivityCallback; - GetCmdName_t* GetCmdName; + hipInitAsyncActivityCallback_t* InitActivityCallback; + hipEnableAsyncActivityCallback_t* EnableActivityCallback; + hipGetOpName_t* GetOpName; protected: void init(Loader* loader) { - // Kalmar::CLAMP::InitActivityCallback - InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); - // Kalmar::CLAMP::EnableActivityIdCallback - EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); - // Kalmar::CLAMP::GetCmdName - GetCmdName = loader->GetFun("GetCmdNameImpl"); +#if HIP_VDI + InitActivityCallback = loader->GetFun("InitActivityCallback"); + EnableActivityCallback = loader->GetFun("EnableActivityCallback"); + GetOpName = loader->GetFun("GetCmdName"); +#else + InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); + EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); + GetOpName = loader->GetFun("GetCmdNameImpl"); +#endif } }; diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 55428a2aba..6b58dced69 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -556,7 +556,7 @@ PUBLIC_API const char* roctracer_op_string( break; } case ACTIVITY_DOMAIN_HCC_OPS: { - return roctracer::HccLoader::Instance().GetCmdName(kind); + return roctracer::HccLoader::Instance().GetOpName(kind); break; } case ACTIVITY_DOMAIN_HIP_API: { @@ -606,7 +606,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: return 1; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; - case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER; + case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; #ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; diff --git a/projects/roctracer/test/CMakeLists.txt b/projects/roctracer/test/CMakeLists.txt index d794c44c39..3be85fc399 100644 --- a/projects/roctracer/test/CMakeLists.txt +++ b/projects/roctracer/test/CMakeLists.txt @@ -33,7 +33,7 @@ set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) add_custom_target( mytest COMMAND make -C "${TEST_DIR}/MatrixTranspose" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" - COMMAND make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" ) diff --git a/projects/roctracer/test/MatrixTranspose_test/Makefile b/projects/roctracer/test/MatrixTranspose_test/Makefile index e74d66b0d1..3caef12634 100644 --- a/projects/roctracer/test/MatrixTranspose_test/Makefile +++ b/projects/roctracer/test/MatrixTranspose_test/Makefile @@ -2,6 +2,7 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64 export LD_LIBRARY_PATH=$(LIB_PATH) +HIP_VDI ?= 0 ITERATIONS ?= 100 HIP_PATH?= $(wildcard /opt/rocm/hip) @@ -23,7 +24,7 @@ EXECUTABLE=./MatrixTranspose all: clean $(EXECUTABLE) -CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS) +CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) diff --git a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp index c2c50ebc41..c2a4ebad3c 100644 --- a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #include #ifndef ITERATIONS -# define ITERATIONS 100 +# define ITERATIONS 101 #endif #define WIDTH 1024 @@ -273,7 +273,7 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->device_id, record->queue_id ); - if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)", record->external_id From 017f7c2a2ddee596b1765ef238c2726a142e468b Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 22 Oct 2019 11:18:33 -0500 Subject: [PATCH 12/23] adding PREFIX_PATH env vsr to build script [ROCm/roctracer commit: 975656ef0986a52c70083c2126c81452d2540224] --- projects/roctracer/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/roctracer/build.sh b/projects/roctracer/build.sh index f0bdaf473b..d8d70c4928 100755 --- a/projects/roctracer/build.sh +++ b/projects/roctracer/build.sh @@ -17,6 +17,7 @@ if [ -z "$HCC_HOME" ] ; then export HCC_HOME="$ROCM_PATH/hcc"; fi if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi +if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) MAKE_OPTS="-j 8 -C $BUILD_DIR" @@ -27,7 +28,7 @@ pushd $BUILD_DIR cmake \ -DCMAKE_MODULE_PATH=$ROCTRACER_ROOT/cmake_modules \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_PREFIX_PATH="$PACKAGE_ROOT" \ + -DCMAKE_PREFIX_PATH="$PREFIX_PATH" \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="DEB;RPM" \ From cafb291589b1e4b1d783a84eeb1bbd1706a418e4 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 22 Oct 2019 15:38:06 -0500 Subject: [PATCH 13/23] hip-clang fixes [ROCm/roctracer commit: 3d021b1f0bb58cb8863ee1b2f30881fe0d82bcde] --- projects/roctracer/build.sh | 2 ++ projects/roctracer/test/tool/tracer_tool.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/projects/roctracer/build.sh b/projects/roctracer/build.sh index d8d70c4928..b94e58da94 100755 --- a/projects/roctracer/build.sh +++ b/projects/roctracer/build.sh @@ -18,6 +18,7 @@ if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi +if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) MAKE_OPTS="-j 8 -C $BUILD_DIR" @@ -32,6 +33,7 @@ cmake \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="DEB;RPM" \ + $HIP_VDI_OPT \ $ROCTRACER_ROOT make make mytest diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index f21b0c7f7e..59b33fba2a 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -321,8 +321,10 @@ void hip_api_callback( entry->ptr = *(data->args.hipMalloc.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: +#if !HIP_VDI case HIP_API_ID_hipExtModuleLaunchKernel: case HIP_API_ID_hipHccModuleLaunchKernel: +#endif const hipFunction_t f = data->args.hipModuleLaunchKernel.f; if (f != NULL) { entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f)); @@ -389,8 +391,10 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: +#if !HIP_VDI case HIP_API_ID_hipExtModuleLaunchKernel: case HIP_API_ID_hipHccModuleLaunchKernel: +#endif fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(), cxx_demangle(entry->name), From eb908692c25e3f3e477b321f5b0943e1ec5e4730 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 22 Oct 2019 17:44:37 -0500 Subject: [PATCH 14/23] hip-vdi library name [ROCm/roctracer commit: 807745c5731aef9c5ee555cf6d08ecaaed802972] --- projects/roctracer/src/core/roctracer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 6b58dced69..b062aca61f 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -1056,8 +1056,11 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( } case ACTIVITY_DOMAIN_HCC_OPS: case ACTIVITY_DOMAIN_HIP_API: { - const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB"); - if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name); +#ifdef HIP_VDI + const char* hip_lib_name = "libamdhip64.so"; + roctracer::HccLoader::SetLibName(hip_lib_name); + roctracer::HipLoader::SetLibName(hip_lib_name); +#endif mark_api_callback_ptr = reinterpret_cast(properties); break; } From 30fbd01542c62db396e3b5962773ed22d850ea32 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 23 Oct 2019 17:20:37 -0500 Subject: [PATCH 15/23] hip trac elayer generator removing - it is located in hip [ROCm/roctracer commit: 064a7021a01a0c09ffb22f956bc6da474830642b] --- projects/roctracer/script/hipap.py | 454 ----------------------------- 1 file changed, 454 deletions(-) delete mode 100755 projects/roctracer/script/hipap.py diff --git a/projects/roctracer/script/hipap.py b/projects/roctracer/script/hipap.py deleted file mode 100755 index 829a2213c4..0000000000 --- a/projects/roctracer/script/hipap.py +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/python -import os, sys, re - -verbose = 0 -PROF_HEADER = "hip_prof_str.h" -OUTPUT = PROF_HEADER -REC_MAX_LEN = 1024 - -# Fatal error termination -inp_file = 'none' -line_num = -1 -def fatal(msg): - if line_num != -1: - print >>sys.stderr, "Error: " + msg + ", file '" + inp_file + "', line (" + str(line_num) + ")" - else: - print >>sys.stderr, "Error: " + msg - sys.exit(1) - -# Verbose message -def message(msg): - if verbose: print >>sys.stdout, msg - -############################################################# -# Normalizing API arguments -def filtr_api_args(args_str): - args_str = re.sub(r'^\s*', r'', args_str); - args_str = re.sub(r'\s*$', r'', args_str); - args_str = re.sub(r'\s*,\s*', r',', args_str); - args_str = re.sub(r'\s+', r' ', args_str); - args_str = re.sub(r'void \*', r'void* ', args_str); - args_str = re.sub(r'(enum|struct) ', '', args_str); - return args_str - -# Normalizing types -def norm_api_types(type_str): - type_str = re.sub(r'uint32_t', r'unsigned int', type_str) - type_str = re.sub(r'^unsigned$', r'unsigned int', type_str) - return type_str - -# Creating a list of arguments [(type, name), ...] -def list_api_args(args_str): - args_str = filtr_api_args(args_str) - args_list = [] - if args_str != '': - for arg_pair in args_str.split(','): - if arg_pair == 'void': continue - arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair); - m = re.match("^(.*)\s(\S+)$", arg_pair); - if m: - arg_type = norm_api_types(m.group(1)) - arg_name = m.group(2) - args_list.append((arg_type, arg_name)) - else: - fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'") - return args_list; - -# Creating arguments string "type0, type1, ..." -def filtr_api_types(args_str): - args_list = list_api_args(args_str) - types_str = '' - for arg_tuple in args_list: - types_str += arg_tuple[0] + ', ' - return types_str - -# Creating options list [opt0, opt1, ...] -def filtr_api_opts(args_str): - args_list = list_api_args(args_str) - opts_list = [] - for arg_tuple in args_list: - opts_list.append(arg_tuple[1]) - return opts_list -############################################################# -# Parsing API header -# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); -def parse_api(inp_file_p, out): - global inp_file - global line_num - inp_file = inp_file_p - - beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); - api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)"); - end_pattern = re.compile("Texture"); - hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)') - nms_open_pattern = re.compile(r'namespace hip_impl {') - nms_close_pattern = re.compile(r'}') - - inp = open(inp_file, 'r') - - found = 0 - hidden = 0 - nms_level = 0; - record = "" - line_num = -1 - - for line in inp.readlines(): - record += re.sub(r'^\s+', r' ', line[:-1]) - line_num += 1 - - if len(record) > REC_MAX_LEN: - fatal("bad record \"" + record + "\"") - - if beg_pattern.match(record) and (hidden == 0) and (nms_level == 0): found = 1 - - if found != 0: - record = re.sub("\s__dparm\([^\)]*\)", '', record); - m = api_pattern.match(record) - if m: - found = 0 - if end_pattern.search(record): break - out[m.group(2)] = m.group(3) - else: continue - - hidden = 0 - if hidden_pattern.match(line): hidden = 1 - - if nms_open_pattern.match(line): nms_level += 1 - if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1 - if nms_level < 0: - fatal("nms level < 0") - - record = "" - - inp.close() - line_num = -1 -############################################################# -# Patching API implementation -# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { -# HIP_INIT_CB(hipSetupArgument, arg, size, offset); -# inp_file - input implementation source file -# api_map - input public API map [] => -# out - output map [] => [opt0, opt1, ...] -def parse_content(inp_file_p, api_map, out): - global inp_file - global line_num - inp_file = inp_file_p - - # API definition begin pattern - beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); - # API definition complete pattern - api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{"); - # API init macro pattern - init_pattern = re.compile("^\s*HIP_INIT[_\w]*_API\(([^,]+)(,|\))"); - target_pattern = re.compile("^(\s*HIP_INIT[^\(]*)(_API\()(.*)\);\s*$"); - - # Open input file - inp = open(inp_file, 'r') - - # API name - api_name = "" - # Valid public API found flag - api_valid = 0 - - # Input file patched content - content = '' - # Sub content for found API defiition - sub_content = '' - # Current record, accumulating several API definition related lines - record = '' - # Current input file line number - line_num = -1 - # API beginning found flag - found = 0 - - # Reading input file - for line in inp.readlines(): - # Accumulating record - record += re.sub(r'^\s+', r' ', line[:-1]) - line_num += 1 - - if len(record) > REC_MAX_LEN: - fatal("bad record \"" + record + "\"") - break; - - # Looking for API begin - if beg_pattern.match(record): found = 1 - - # Matching complete API definition - if found == 1: - record = re.sub("\s__dparm\([^\)]*\)", '', record); - m = api_pattern.match(record) - # Checking if complete API matched - if m: - found = 2 - api_name = m.group(2); - # Checking if API name is in the API map - if api_name in api_map: - # Getting API arguments - api_args = m.group(3) - # Getting etalon arguments from the API map - eta_args = api_map[api_name] - if eta_args == '': - eta_args = api_args - api_map[api_name] = eta_args - # Normalizing API arguments - api_types = filtr_api_types(api_args) - # Normalizing etalon arguments - eta_types = filtr_api_types(eta_args) - if api_types == eta_types: - # API is already found - if api_name in out: - fatal("API redefined \"" + api_name + "\", record \"" + record + "\"") - # Set valid public API found flag - api_valid = 1 - # Set output API map with API arguments list - out[api_name] = filtr_api_opts(api_args) - else: - # Warning about mismatched API, possible non public overloaded version - api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types - message("\t" + api_name + ':\n' + api_diff + '\n') - - # API found action - if found == 2: - # Looking for INIT macro - m = init_pattern.match(line) - if m: - found = 0 - if api_valid == 1: - api_valid = 0 - message("\t" + api_name) - else: - # Registering dummy API for non public API if the name in INIT is not NONE - init_name = m.group(1) - # Ignore if it is initialized as NONE - if init_name != 'NONE': - # Check if init name matching API name - if init_name != api_name: - fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") - # If init name is not in public API map then it is private API - # else it was not identified and will be checked on finish - if not init_name in api_map: - if init_name in out: - fatal("API reinit \"" + api_name + "\", record \"" + record + "\"") - out[init_name] = [] - elif re.search('}', line): - found = 0 - # Expect INIT macro for valid public API - if api_valid == 1: - api_valid = 0 - if api_name in out: - del out[api_name] - del api_map[api_name] - out['.' + api_name] = 1 - else: - fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"") - - if found != 1: record = "" - content += line - - inp.close() - line_num = -1 - - if len(out) != 0: - return content - else: - return '' - -# src path walk -def parse_src(api_map, src_path, src_patt, out): - pattern = re.compile(src_patt) - src_path = re.sub(r'\s', '', src_path) - for src_dir in src_path.split(':'): - message("Parsing " + src_dir + " for '" + src_patt + "'") - for root, dirs, files in os.walk(src_dir): - for fnm in files: - if pattern.search(fnm): - file = root + '/' + fnm - message(file) - content = parse_content(file, api_map, out); - if content != '': - f = open(file, 'w') - f.write(content) - f.close() -############################################################# -# Generating profiling primitives header -# api_map - public API map [] => [(type, name), ...] -# opts_map - opts map [] => [opt0, opt1, ...] -def generate_prof_header(f, api_map, opts_map): - # Private API list - priv_lst = [] - - f.write('// automatically generated sources\n') - f.write('#ifndef _HIP_PROF_STR_H\n'); - f.write('#define _HIP_PROF_STR_H\n'); - f.write('#include \n'); - f.write('#include \n'); - - # Generating dummy macro for non-public API - f.write('\n// Dummy API primitives\n') - f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n') - for name in opts_map: - if not name in api_map: - opts_lst = opts_map[name] - if len(opts_lst) != 0: - fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst)) - f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n') - priv_lst.append(name) - - for name in priv_lst: - message("Private: " + name) - - # Generating the callbacks ID enumaration - f.write('\n// HIP API callbacks ID enumaration\n') - f.write('enum hip_api_id_t {\n') - cb_id = 0 - for name in api_map.keys(): - f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') - cb_id += 1 - f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n') - f.write(' HIP_API_ID_ANY = ' + str(cb_id + 1) + ',\n') - f.write('\n') - f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n') - for name in priv_lst: - f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n') - f.write('};\n') - - # Generating the callbacks ID enumaration - f.write('\n// Return HIP API string\n') - f.write('static const char* hip_api_name(const uint32_t& id) {\n') - f.write(' switch(id) {\n') - for name in api_map.keys(): - f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n') - f.write(' };\n') - f.write(' return "unknown";\n') - f.write('};\n') - - # Generating the callbacks data structure - f.write('\n// HIP API callbacks data structure\n') - f.write( - 'struct hip_api_data_t {\n' + - ' uint64_t correlation_id;\n' + - ' uint32_t phase;\n' + - ' union {\n' - ) - for name, args in api_map.items(): - if len(args) != 0: - f.write(' struct {\n') - for arg_tuple in args: - f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') - f.write(' } ' + name + ';\n') - f.write( - ' } args;\n' + - '};\n' - ) - - # Generating the callbacks args data filling macros - f.write('\n// HIP API callbacks args data filling macros\n') - for name, args in api_map.items(): - f.write('// ' + name + str(args) + '\n') - f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n') - if name in opts_map: - opts_list = opts_map[name] - if len(args) != len(opts_list): - fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list)) - # API args iterating: - # type is args[][0] - # name is args[][1] - for ind in range(0, len(args)): - arg_tuple = args[ind] - fld_name = arg_tuple[1] - arg_name = opts_list[ind] - f.write(' cb_data.args.' + name + '.' + fld_name + ' = ' + arg_name + '; \\\n') - f.write('};\n') - f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') - - # Generating the method for the API string, name and parameters - f.write('\n') - f.write('#if 0\n') - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') - - f.write('#endif // _HIP_PROF_STR_H\n'); - -############################################################# -# main -# Usage -if (len(sys.argv) > 1) and (sys.argv[1] == '-v'): - verbose = 1 - sys.argv.pop(1) - -if (len(sys.argv) < 3): - fatal ("Usage: " + sys.argv[0] + " [-v] \n" + - " -v - verbose messages\n" + - " example:\n" + - " $ hipap.py hip/include/hip/hcc_detail/hip_runtime_api.h hip/src") - -# API header file given as an argument -api_hfile = sys.argv[1] -if not os.path.isfile(api_hfile): - fatal("input file '" + api_hfile + "' not found") - -# Srcs directory given as an argument -src_pat = "\.cpp$" -src_dir = sys.argv[2] -if not os.path.isdir(src_dir): - fatal("src directory " + src_dir + "' not found") - -if len(sys.argv) > 3: OUTPUT = sys.argv[3] - -# API declaration map -api_map = { - 'hipHccModuleLaunchKernel': '' -} -# API options map -opts_map = {} - -# Parsing API header -parse_api(api_hfile, api_map) - -# Parsing sources -parse_src(api_map, src_dir, src_pat, opts_map) - -# Checking for non-conformant APIs -for name in opts_map.keys(): - m = re.match(r'\.(\S*)', name) - if m: - message("Init missing: " + m.group(1)) - del opts_map[name] - -# Converting api map to map of lists -# Checking for not found APIs -not_found = 0 -if len(opts_map) != 0: - for name in api_map.keys(): - args_str = api_map[name]; - api_map[name] = list_api_args(args_str) - if not name in opts_map: - fatal("not found: " + name) - not_found += 1 -if not_found != 0: - fatal(not_found + " API calls not found") - -# Generating output header file -with open(OUTPUT, 'w') as f: - generate_prof_header(f, api_map, opts_map) - -# Successfull exit -sys.exit(0) From 61dbb74c8a1698a715ce14d603bc84f4f70bd16a Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 00:53:26 -0500 Subject: [PATCH 16/23] hip-vdi changes [ROCm/roctracer commit: eb4bf2dac6ff736f71dbf115b63993b9f06f689b] --- projects/roctracer/src/core/loader.h | 5 ++--- projects/roctracer/src/core/roctracer.cpp | 2 ++ .../test/MatrixTranspose_test/MatrixTranspose.cpp | 3 +++ projects/roctracer/test/run.sh | 10 ++++++++-- projects/roctracer/test/tool/tracer_tool.cpp | 4 ++-- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index 1c4005dd3b..f2ba57ed9c 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -172,11 +172,10 @@ typedef BaseLoader RocTxLoader; #define LOADER_INSTANTIATE() \ template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ - template const bool roctracer::BaseLoader::strong_ld_check_ = false; + template const bool roctracer::BaseLoader::strong_ld_check_ = true; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ - template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ - template<> const bool roctracer::RocTxLoader::strong_ld_check_ = false; + template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; #endif // SRC_CORE_LOADER_H_ diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index b062aca61f..155892c6dd 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -324,10 +324,12 @@ void* HIP_SyncActivityCallback( if ((data == NULL) && (pool != NULL)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: pool is not NULL"); // Allocating a record if NULL passed if (record == NULL) { + if (data != NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: record is NULL"); record_pair_stack.push({}); auto& top = record_pair_stack.top(); record = &(top.record); data_ptr = &(top.data.hip); + data = data_ptr; } // Filing record info diff --git a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp index c2a4ebad3c..40082f9f38 100644 --- a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -291,6 +291,8 @@ void activity_callback(const char* begin, const char* end, void* arg) { // Init tracing routine void init_tracing() { std::cout << "# INIT #############################" << std::endl << std::flush; + // roctracer properties + roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL); // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x1000; @@ -320,6 +322,7 @@ void stop_tracing() { std::cout << "# STOP #############################" << std::endl << std::flush; } #else +void init_tracing() {} void start_tracing() {} void stop_tracing() {} #endif diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index 3b383da201..d3e317cc70 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -25,7 +25,7 @@ # enable tools load failure reporting export HSA_TOOLS_REPORT_LOAD_FAILURE=1 # paths to ROC profiler and oher libraries -export LD_LIBRARY_PATH=$PWD +export LD_LIBRARY_PATH=$PWD:/opt/rocm/hip/lib # test filter input test_filter=-1 @@ -33,6 +33,12 @@ if [ -n "$1" ] ; then test_filter=$1 fi +# debugger +debugger="" +if [ -n "$2" ] ; then + debugger=$2 +fi + # test check routin test_status=0 test_runnum=0 @@ -46,7 +52,7 @@ eval_test() { if [ $test_filter = -1 -o $test_filter = $test_number ] ; then echo "$label: \"$cmdline\"" test_runnum=$((test_runnum + 1)) - eval "$cmdline" + eval "$debugger $cmdline" if [ $? != 0 ] ; then echo "$label: FAILED" test_status=$(($test_status + 1)) diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 59b33fba2a..b5c5c2bf6d 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -680,6 +680,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); fprintf(stdout, " HIP-trace()\n"); fflush(stdout); + // roctracer properties + roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x80000; @@ -688,8 +690,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); - - roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); } const char* ctrl_str = getenv("ROCP_CTRL_RATE"); From d6aaf049cf3be0c66f038f6a24888cbad776cacb Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 02:10:09 -0500 Subject: [PATCH 17/23] hsa and hip tracing interf2erence [ROCm/roctracer commit: 640e24ea9a51e2232444c453e539b293e3945306] --- projects/roctracer/test/tool/tracer_tool.cpp | 83 +++++++++++++------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index b5c5c2bf6d..99b87c1135 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -71,7 +71,8 @@ thread_local timestamp_t kfd_begin_timestamp = 0; bool trace_roctx = false; bool trace_hsa_api = false; bool trace_hsa_activity = false; -bool trace_hip = false; +bool trace_hip_api = false; +bool trace_hip_activity = false; bool trace_kfd = false; LOADER_INSTANTIATE(); @@ -531,28 +532,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); } timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); - // API traces switches - const char* trace_domain = getenv("ROCTRACER_DOMAIN"); - if (trace_domain != NULL) { - if (std::string(trace_domain).find("roctx") != std::string::npos) { - trace_roctx = true; - } - if (std::string(trace_domain).find("hsa") != std::string::npos) { - trace_hsa_api = true; - trace_hsa_activity = true; - } - if (std::string(trace_domain).find("hip") != std::string::npos) { - trace_hip = true; - } - if (std::string(trace_domain).find("sys") != std::string::npos) { - trace_hsa_api = true; - trace_hip = true; - } - if (std::string(trace_domain).find("kfd") != std::string::npos) { - trace_kfd = true; - } - } - // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); if (output_prefix != NULL) { @@ -565,6 +544,43 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } + // API traces switches + const char* trace_domain = getenv("ROCTRACER_DOMAIN"); + if (trace_domain != NULL) { + // ROCTX domain + if (std::string(trace_domain).find("roctx") != std::string::npos) { + trace_roctx = true; + } + + // HSA/HIP domains enabling + if (std::string(trace_domain).find("hsa-api") != std::string::npos) { + trace_hsa_api = true; + } + if (std::string(trace_domain).find("hsa-act") != std::string::npos) { + trace_hsa_activity = true; + } + if ((trace_hsa_activity == false) && (trace_hsa_api == false)) { + if (std::string(trace_domain).find("hsa") != std::string::npos) { + trace_hsa_api = true; + trace_hsa_activity = true; + } + } + if (std::string(trace_domain).find("hip") != std::string::npos) { + trace_hip_api = true; + trace_hip_activity = true; + } + if (std::string(trace_domain).find("sys") != std::string::npos) { + trace_hsa_api = true; + trace_hip_api = true; + trace_hip_activity = true; + } + + // KFD domain enabling + if (std::string(trace_domain).find("kfd") != std::string::npos) { + trace_kfd = true; + } + } + // API trace vector std::vector hsa_api_vec; std::vector kfd_api_vec; @@ -608,7 +624,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } if (name == "HIP") { found = true; - trace_hip = true; + trace_hip_api = true; + trace_hip_activity = true; } if (name == "KFD") { found = true; @@ -621,6 +638,9 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } printf("\n"); + // Disable HIP activity if HSA activity was set + if (trace_hsa_activity == true) trace_hip_activity = false; + // Enable rpcTX callbacks if (trace_roctx) { roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt"); @@ -658,6 +678,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } + // Enable HSA GPU activity if (trace_hsa_activity) { hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); @@ -675,7 +696,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } // Enable HIP API callbacks/activity - if (trace_hip) { + if (trace_hip_api || trace_hip_activity) { hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); @@ -687,9 +708,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, properties.buffer_size = 0x80000; properties.buffer_callback_fun = hcc_activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); + if (trace_hip_api) { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + } + if (trace_hip_activity) { + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + } } const char* ctrl_str = getenv("ROCP_CTRL_RATE"); @@ -775,7 +800,7 @@ void tool_unload(bool destruct) { close_output_file(hsa_async_copy_file_handle); } - if (trace_hip) { + if (trace_hip_api || trace_hip_activity) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); From 130be9b91e661ff26758ede44a26281e21c08c48 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 10:11:16 -0500 Subject: [PATCH 18/23] reverting ld_library_path [ROCm/roctracer commit: daac4fad76952d79e115df9e843870545ca786f1] --- projects/roctracer/test/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index d3e317cc70..af56efe5f3 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -25,7 +25,7 @@ # enable tools load failure reporting export HSA_TOOLS_REPORT_LOAD_FAILURE=1 # paths to ROC profiler and oher libraries -export LD_LIBRARY_PATH=$PWD:/opt/rocm/hip/lib +export LD_LIBRARY_PATH=$PWD # test filter input test_filter=-1 From f4a0cb7a698350659eaf1497686d2a77d8b5519e Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 17:39:05 -0500 Subject: [PATCH 19/23] enabling rocprofiler kernels trace [ROCm/roctracer commit: 2303c1e398c2ce0d8281974eb07e31a124b071b8] --- projects/roctracer/src/core/roctracer.cpp | 2 ++ projects/roctracer/test/tool/tracer_tool.cpp | 10 +--------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 155892c6dd..e93179de56 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -1026,11 +1026,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; roctracer::hsa_support::output_prefix = ops_properties->output_prefix; +#if 0 // HSA dispatches intercepting rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); rocprofiler::ProxyQueue::HsaIntercept(table); rocprofiler::InterceptQueue::HsaIntercept(table); +#endif // HSA async-copy tracing hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 99b87c1135..71da132baa 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -553,18 +553,10 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } // HSA/HIP domains enabling - if (std::string(trace_domain).find("hsa-api") != std::string::npos) { + if (std::string(trace_domain).find("hsa") != std::string::npos) { trace_hsa_api = true; - } - if (std::string(trace_domain).find("hsa-act") != std::string::npos) { trace_hsa_activity = true; } - if ((trace_hsa_activity == false) && (trace_hsa_api == false)) { - if (std::string(trace_domain).find("hsa") != std::string::npos) { - trace_hsa_api = true; - trace_hsa_activity = true; - } - } if (std::string(trace_domain).find("hip") != std::string::npos) { trace_hip_api = true; trace_hip_activity = true; From 07f81abe3b5cfdeb63cd6e19b543089245ed3b74 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 19:21:24 -0500 Subject: [PATCH 20/23] disable kfd wrapper installation [ROCm/roctracer commit: 8941ca3acdb15f296d75bf88305237185edd7564] --- projects/roctracer/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/roctracer/CMakeLists.txt b/projects/roctracer/CMakeLists.txt index f14ba0b9e4..185b62dd65 100644 --- a/projects/roctracer/CMakeLists.txt +++ b/projects/roctracer/CMakeLists.txt @@ -110,7 +110,7 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION in install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) ## kfdwrapper -install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) +#install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) From f974164bb4dc3950da4daa9d5b9690b01d47e3d9 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 19:39:04 -0500 Subject: [PATCH 21/23] fix loader to have strict ldopen [ROCm/roctracer commit: d2db29cd7d883c9d1a77329423a9c9a279757c3c] --- projects/roctracer/src/core/loader.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index f2ba57ed9c..fa2b0e62ad 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -29,12 +29,12 @@ class BaseLoader : public T { return f; } - static inline loader_t& Instance(const bool& preload = false) { + static inline loader_t& Instance() { loader_t* obj = instance_.load(std::memory_order_acquire); if (obj == NULL) { std::lock_guard lck(mutex_); if (instance_.load(std::memory_order_relaxed) == NULL) { - obj = new loader_t(preload); + obj = new loader_t(); instance_.store(obj, std::memory_order_release); } } @@ -45,11 +45,11 @@ class BaseLoader : public T { static void SetLibName(const char *name) { lib_name_ = name; } private: - BaseLoader(bool preload) { - const int flags = (preload) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; + BaseLoader() { + const int flags = RTLD_LAZY; handle_ = dlopen(lib_name_, flags); - if ((handle_ == NULL) && (strong_ld_check_)) { - fprintf(stderr, "roctracer: Loading '%s' failed, preload(%d), %s\n", lib_name_, (int)preload, dlerror()); + if (handle_ == NULL) { + fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); abort(); } dlerror(); @@ -64,7 +64,6 @@ class BaseLoader : public T { static mutex_t mutex_; static const char* lib_name_; static std::atomic instance_; - static const bool strong_ld_check_; void* handle_; }; @@ -172,7 +171,6 @@ typedef BaseLoader RocTxLoader; #define LOADER_INSTANTIATE() \ template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ - template const bool roctracer::BaseLoader::strong_ld_check_ = true; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ From d4ddee69d4acd8ef3bce98cb088a1bbd855e724e Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 25 Oct 2019 20:10:44 -0500 Subject: [PATCH 22/23] update [ROCm/roctracer commit: 086a8f4aa6770d0a065907edce137612d280ada7] --- projects/roctracer/CMakeLists.txt | 6 ++++-- projects/roctracer/cmake_modules/env.cmake | 2 +- projects/roctracer/src/core/roctracer.cpp | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/projects/roctracer/CMakeLists.txt b/projects/roctracer/CMakeLists.txt index 185b62dd65..e90a4f7924 100644 --- a/projects/roctracer/CMakeLists.txt +++ b/projects/roctracer/CMakeLists.txt @@ -109,8 +109,10 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) -## kfdwrapper -#install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) +## KFD wrapper +if ( DEFINED KFD_WRAPPER ) + install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) +endif () ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) diff --git a/projects/roctracer/cmake_modules/env.cmake b/projects/roctracer/cmake_modules/env.cmake index 727cd0839e..9ad3fbf23b 100644 --- a/projects/roctracer/cmake_modules/env.cmake +++ b/projects/roctracer/cmake_modules/env.cmake @@ -43,7 +43,7 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) add_definitions ( -DDEBUG_TRACE=1 ) endif() -## Enable KFD wrapper +## Enable HIP_VDI mode if ( DEFINED HIP_VDI ) add_definitions ( -DHIP_VDI=${HIP_VDI} ) else() diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index e93179de56..a3b701c3ee 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -92,7 +92,6 @@ THE SOFTWARE. #endif static inline uint32_t GetPid() { return syscall(__NR_getpid); } -static inline uint32_t GetTid() { return syscall(__NR_gettid); } /////////////////////////////////////////////////////////////////////////////////////////////////// // Mark callback From f6f685f8d4326cd5001a400e73316e75515fd001 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 6 Nov 2019 09:11:11 -0600 Subject: [PATCH 23/23] hip-vdi sync activity callback fix [ROCm/roctracer commit: 6a442864a326b685130af461472da27a1b5a41e2] --- projects/roctracer/src/core/roctracer.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index a3b701c3ee..1ae25fb885 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -313,22 +313,23 @@ void* HIP_SyncActivityCallback( MemoryPool* pool = reinterpret_cast(arg); int phase = ACTIVITY_API_PHASE_ENTER; - if (data != NULL) { + if (record != NULL) { + if (data == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback: data is NULL"); phase = data->phase; } else if (pool != NULL) { phase = ACTIVITY_API_PHASE_EXIT; } if (phase == ACTIVITY_API_PHASE_ENTER) { - if ((data == NULL) && (pool != NULL)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: pool is not NULL"); // Allocating a record if NULL passed if (record == NULL) { if (data != NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: record is NULL"); record_pair_stack.push({}); auto& top = record_pair_stack.top(); record = &(top.record); - data_ptr = &(top.data.hip); - data = data_ptr; + data = &(top.data.hip); + data_ptr = const_cast(data); + data_ptr->phase = phase; } // Filing record info @@ -340,7 +341,7 @@ void* HIP_SyncActivityCallback( uint64_t correlation_id = data->correlation_id; if (correlation_id == 0) { correlation_id = GlobalCounter::Increment(); - const_cast(data)->correlation_id = correlation_id; + data_ptr->correlation_id = correlation_id; } record->correlation_id = correlation_id; @@ -352,11 +353,10 @@ void* HIP_SyncActivityCallback( if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: pool is NULL"); // Getting record of stacked - if (!record_pair_stack.empty()) { + if (record == NULL) { + if (record_pair_stack.empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty"); auto& top = record_pair_stack.top(); record = &(top.record); - data = &(top.data.hip); - record_pair_stack.pop(); } // Filing record info @@ -376,6 +376,9 @@ void* HIP_SyncActivityCallback( // Writing record to the buffer pool->Write(*record); + // popping the record entry + if (!record_pair_stack.empty()) record_pair_stack.pop(); + // Clearing correlatin ID correlation_id_tls = 0;