From c05bded17cc9f5d3e6e9dcc02fd1550f0f9fce09 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 16 Oct 2018 19:33:48 -0500 Subject: [PATCH] dispatch data th-id, queue-id, kernel-object Change-Id: Ic5593603c6587cad17be33618d011a9aefc648ab --- bin/run_tool.sh | 36 +++++ inc/rocprofiler.h | 5 +- src/core/intercept_queue.cpp | 1 + src/core/intercept_queue.h | 22 ++- test/CMakeLists.txt | 16 ++- test/app/intercept_test.cpp | 231 ++++++++++++++++++++++++++++++ test/app/intercept_test_stand.cpp | 189 ++++++++++++++++++++++++ test/ctrl/run_kernel.h | 3 + test/ctrl/test_hsa.cpp | 22 +-- test/run.sh | 18 ++- test/tool/tool.cpp | 3 +- 11 files changed, 523 insertions(+), 23 deletions(-) create mode 100755 bin/run_tool.sh create mode 100644 test/app/intercept_test.cpp create mode 100644 test/app/intercept_test_stand.cpp diff --git a/bin/run_tool.sh b/bin/run_tool.sh new file mode 100755 index 0000000000..5ee438c09b --- /dev/null +++ b/bin/run_tool.sh @@ -0,0 +1,36 @@ +#!/bin/sh +BIN_DIR=`dirname $0` +BIN_DIR=`cd $BIN_DIR; pwd` +PKG_DIR=`echo $BIN_DIR | sed "s/\/bin\/*//"` +BIN_DIR=$PKG_DIR/bin + +# PATH to custom HSA libs +HSA_PATH=$PKG_DIR/lib/hsa + +if [ -z "$1" ] ; then + echo "Usage: $0 " +else +# profiler plugin library +test_app=$* + +# paths to ROC profiler and oher libraries +export LD_LIBRARY_PATH=$PKG_DIR/lib:$PKG_DIR/tool:$HSA_PATH +export PATH=.:$PATH + +# ROC profiler library loaded by HSA runtime +export HSA_TOOLS_LIB=librocprofiler64.so.1 +# tool library loaded by ROC profiler +if [ -z $ROCP_TOOL_LIB ] ; then + export ROCP_TOOL_LIB=libintercept_test.so +fi +# enable error messages +export HSA_TOOLS_REPORT_LOAD_FAILURE=1 +export HSA_VEN_AMD_AQLPROFILE_LOG=1 +export ROCPROFILER_LOG=1 +# ROC profiler metrics config file +unset ROCP_PROXY_QUEUE +# ROC profiler metrics config file +export ROCP_METRICS=$BIN_DIR/lib/metrics.xml + +LD_PRELOAD=$ROCP_TOOL_LIB $test_app +fi diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index c6c58baa77..cc21b76f0c 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -46,7 +46,7 @@ THE SOFTWARE. #include #include -#define ROCPROFILER_VERSION_MAJOR 3 +#define ROCPROFILER_VERSION_MAJOR 4 #define ROCPROFILER_VERSION_MINOR 0 #ifdef __cplusplus @@ -217,8 +217,11 @@ typedef struct { uint32_t agent_index; // GPU index const hsa_queue_t* queue; // HSA queue uint64_t queue_index; // Index in the queue + uint32_t queue_id; // Queue id const hsa_kernel_dispatch_packet_t* packet; // HSA dispatch packet const char* kernel_name; // Kernel name + uint64_t kernel_object; // Kernel object pointer + int64_t thread_id; // Thread id const rocprofiler_dispatch_record_t* record; // Dispatch record } rocprofiler_callback_data_t; diff --git a/src/core/intercept_queue.cpp b/src/core/intercept_queue.cpp index 7703c662bd..2b901767dc 100644 --- a/src/core/intercept_queue.cpp +++ b/src/core/intercept_queue.cpp @@ -37,5 +37,6 @@ const char* InterceptQueue::kernel_none_ = ""; Tracker* InterceptQueue::tracker_ = NULL; bool InterceptQueue::tracker_on_ = false; bool InterceptQueue::in_constr_call_ = false; +InterceptQueue::queue_id_t InterceptQueue::current_queue_id = 0; } // namespace rocprofiler diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h index ec81a81865..0420ccb81f 100644 --- a/src/core/intercept_queue.h +++ b/src/core/intercept_queue.h @@ -26,6 +26,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -49,6 +50,7 @@ class InterceptQueue { typedef std::map obj_map_t; typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data); typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg); + typedef uint32_t queue_id_t; static void HsaIntercept(HsaApiTable* table); @@ -79,6 +81,8 @@ class InterceptQueue { (*obj_map_)[(uint64_t)(*queue)] = obj; status = proxy->SetInterceptCB(OnSubmitCB, obj); obj->queue_event_callback_ = callback; + obj->queue_id = current_queue_id; + ++current_queue_id; in_constr_call_ = false; return status; @@ -139,13 +143,17 @@ class InterceptQueue { } // Prepareing dispatch callback data - const char* kernel_name = GetKernelName(dispatch_packet); + uint64_t kernel_symbol = GetKernelSymbol(dispatch_packet); + const char* kernel_name = GetKernelName(kernel_symbol); rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, obj->agent_info_->dev_index, obj->queue_, user_que_idx, + obj->queue_id, dispatch_packet, kernel_name, + kernel_symbol, + syscall(__NR_gettid), (tracker_entry) ? tracker_entry->record : NULL}; // Calling dispatch callback @@ -222,7 +230,7 @@ class InterceptQueue { return static_cast((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask); } - static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) { + static uint64_t GetKernelSymbol(const hsa_kernel_dispatch_packet_t* dispatch_packet) { const amd_kernel_code_t* kernel_code = NULL; hsa_status_t status = util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address( @@ -231,8 +239,12 @@ class InterceptQueue { if (HSA_STATUS_SUCCESS != status) { kernel_code = reinterpret_cast(dispatch_packet->kernel_object); } - amd_runtime_loader_debug_info_t* dbg_info = reinterpret_cast( - kernel_code->runtime_loader_kernel_symbol); + return kernel_code->runtime_loader_kernel_symbol; + } + + static const char* GetKernelName(const uint64_t kernel_symbol) { + amd_runtime_loader_debug_info_t* dbg_info = + reinterpret_cast(kernel_symbol); const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL; // Kernel name is mangled name @@ -298,11 +310,13 @@ class InterceptQueue { static Tracker* tracker_; static bool tracker_on_; static bool in_constr_call_; + static queue_id_t current_queue_id; hsa_queue_t* const queue_; ProxyQueue* const proxy_; const util::AgentInfo* agent_info_; queue_event_callback_t queue_event_callback_; + queue_id_t queue_id; }; } // namespace rocprofiler diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2b12b28a66..c7d86ccfb1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,11 +37,18 @@ file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" ) ## Standalone test sources set ( STEXE_NAME "standalone_test" ) -set ( STST_SRC +set ( STTST_SRC ${TEST_DIR}/app/standalone_test.cpp ${TEST_DIR}/ctrl/test_hsa.cpp ) +## Intercept test sources +set ( INEXE_NAME "intercept_test" ) +set ( INTST_SRC + ${TEST_DIR}/app/intercept_test.cpp + ${TEST_DIR}/ctrl/test_hsa.cpp +) + ## Test control sources set ( CTRL_SRC ${TEST_DIR}/app/test.cpp @@ -58,10 +65,15 @@ set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp ) execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) ## Building standalone test executable -add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} ) +add_executable ( ${STEXE_NAME} ${STTST_SRC} ${UTIL_SRC} ${KERN_SRC} ) target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) +## Building intercept test executable +add_library ( ${INEXE_NAME} SHARED ${INTST_SRC} ${UTIL_SRC} ${KERN_SRC} ) +target_include_directories ( ${INEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) + ## Building ctrl test executable add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} ) target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) diff --git a/test/app/intercept_test.cpp b/test/app/intercept_test.cpp new file mode 100644 index 0000000000..87e00d6468 --- /dev/null +++ b/test/app/intercept_test.cpp @@ -0,0 +1,231 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include +#include + +#include +#include +#include + +#include "ctrl/run_kernel.h" +#include "ctrl/test_aql.h" +#include "ctrl/test_hsa.h" +#include "inc/rocprofiler.h" +#include "dummy_kernel/dummy_kernel.h" +#include "simple_convolution/simple_convolution.h" +#include "util/test_assert.h" + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +// Dispatch callbacks and context handlers synchronization +pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; +// Tool is unloaded +volatile bool is_loaded = false; + +// Error handler +void fatal(const std::string msg) { + fflush(stdout); + fprintf(stderr, "%s\n\n", msg.c_str()); + fflush(stderr); + abort(); +} + +// Check returned HSA API status +void check_status(hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char* error_string = NULL; + rocprofiler_error_string(&error_string); + fprintf(stderr, "ERROR: %s\n", error_string); + abort(); + } +} + +// Context stored entry type +struct context_entry_t { + bool valid; + hsa_agent_t agent; + rocprofiler_group_t group; + rocprofiler_callback_data_t data; +}; + +// Dump stored context entry +void dump_context_entry(context_entry_t* entry) { + volatile std::atomic* valid = reinterpret_cast*>(&entry->valid); + while (valid->load() == false) sched_yield(); + + const std::string kernel_name = entry->data.kernel_name; + const rocprofiler_dispatch_record_t* record = entry->data.record; + + fflush(stdout); + fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\") tid(%ld) queue-id(%u) gpu-id(%u) ", + entry->data.kernel_object, + kernel_name.c_str(), + entry->data.thread_id, + entry->data.queue_id, + HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index); + if (record) fprintf(stdout, "time(%lu,%lu,%lu,%lu)", + record->dispatch, + record->begin, + record->end, + record->complete); + fprintf(stdout, "\n"); + fflush(stdout); + + rocprofiler_group_t& group = entry->group; + if (group.context == NULL) { + fprintf(stderr, "tool error: context is NULL\n"); + abort(); + } + + rocprofiler_close(group.context); +} + +// Profiling completion handler +// Dump and delete the context entry +// Return true if the context was dumped successfully +bool context_handler(rocprofiler_group_t group, void* arg) { + context_entry_t* entry = reinterpret_cast(arg); + + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + abort(); + } + + dump_context_entry(entry); + delete entry; + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + abort(); + } + + return false; +} + +// Kernel disoatch callback +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/, + rocprofiler_group_t* group) { + // HSA status + hsa_status_t status = HSA_STATUS_ERROR; + + // Profiling context + rocprofiler_t* context = NULL; + + // Context entry + context_entry_t* entry = new context_entry_t(); + + // context properties + rocprofiler_properties_t properties{}; + properties.handler = context_handler; + properties.handler_arg = (void*)entry; + + // Open profiling context + status = rocprofiler_open(callback_data->agent, NULL, 0, + &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + check_status(status); + + // Get group[0] + status = rocprofiler_get_group(context, 0, group); + check_status(status); + + // Fill profiling context entry + entry->agent = callback_data->agent; + entry->group = *group; + entry->data = *callback_data; + entry->data.kernel_name = strdup(callback_data->kernel_name); + reinterpret_cast*>(&entry->valid)->store(true); + + return HSA_STATUS_SUCCESS; +} + +void initialize() { + // Getting GPU device info + const AgentInfo* agent_info = NULL; + if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) { + fprintf(stderr, "GetGpuAgentInfo failed\n"); + abort(); + } + + // Adding dispatch observer + rocprofiler_queue_callbacks_t callbacks_ptrs{}; + callbacks_ptrs.dispatch = dispatch_callback; + rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL); +} + +void cleanup() { + // Unregister dispatch callback + rocprofiler_remove_queue_callbacks(); + + // Dump stored profiling output data + fflush(stdout); +} + +// Tool constructor +extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) +{ + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + abort(); + } + if (is_loaded) return; + is_loaded = true; + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + abort(); + } + + // Enable timestamping + settings->timestamp_on = true; + + // Initialize profiling + initialize(); +} + +// Tool destructor +extern "C" PUBLIC_API void OnUnloadTool() { + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + abort(); + } + if (!is_loaded) return; + is_loaded = false; + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + abort(); + } + + // Final resources cleanup + cleanup(); +} + +extern "C" CONSTRUCTOR_API void constructor() { + printf("INTT constructor\n"); fflush(stdout); +} + +extern "C" DESTRUCTOR_API void destructor() { + if (is_loaded == true) OnUnloadTool(); +} diff --git a/test/app/intercept_test_stand.cpp b/test/app/intercept_test_stand.cpp new file mode 100644 index 0000000000..de3dbdaf72 --- /dev/null +++ b/test/app/intercept_test_stand.cpp @@ -0,0 +1,189 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "ctrl/run_kernel.h" +#include "ctrl/test_aql.h" +#include "ctrl/test_hsa.h" +#include "inc/rocprofiler.h" +#include "dummy_kernel/dummy_kernel.h" +#include "simple_convolution/simple_convolution.h" +#include "util/test_assert.h" + +// Dispatch callbacks and context handlers synchronization +pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; + +// Error handler +void fatal(const std::string msg) { + fflush(stdout); + fprintf(stderr, "%s\n\n", msg.c_str()); + fflush(stderr); + abort(); +} + +// Check returned HSA API status +void check_status(hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char* error_string = NULL; + rocprofiler_error_string(&error_string); + fprintf(stderr, "ERROR: %s\n", error_string); + abort(); + } +} + +// Context stored entry type +struct context_entry_t { + bool valid; + hsa_agent_t agent; + rocprofiler_group_t group; + rocprofiler_callback_data_t data; +}; + +// Dump stored context entry +void dump_context_entry(context_entry_t* entry) { + volatile std::atomic* valid = reinterpret_cast*>(&entry->valid); + while (valid->load() == false) sched_yield(); + + const std::string kernel_name = entry->data.kernel_name; + const rocprofiler_dispatch_record_t* record = entry->data.record; + + fflush(stdout); + fprintf(stdout, "kernel symbol(0x%lx) name(\"%s\")", entry->data.kernel_object, kernel_name.c_str()); + if (record) fprintf(stdout, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)", + HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + record->dispatch, + record->begin, + record->end, + record->complete); + fprintf(stdout, "\n"); + fflush(stdout); + + rocprofiler_group_t& group = entry->group; + if (group.context == NULL) { + fprintf(stderr, "tool error: context is NULL\n"); + abort(); + } + + rocprofiler_close(group.context); +} + +// Profiling completion handler +// Dump and delete the context entry +// Return true if the context was dumped successfully +bool context_handler(rocprofiler_group_t group, void* arg) { + context_entry_t* entry = reinterpret_cast(arg); + + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + abort(); + } + + dump_context_entry(entry); + delete entry; + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + abort(); + } + + return false; +} + +// Kernel disoatch callback +hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* /*user_data*/, + rocprofiler_group_t* group) { + // HSA status + hsa_status_t status = HSA_STATUS_ERROR; + + // Profiling context + rocprofiler_t* context = NULL; + + // Context entry + context_entry_t* entry = new context_entry_t(); + + // context properties + rocprofiler_properties_t properties{}; + properties.handler = context_handler; + properties.handler_arg = (void*)entry; + + // Open profiling context + status = rocprofiler_open(callback_data->agent, NULL, 0, + &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + check_status(status); + + // Get group[0] + status = rocprofiler_get_group(context, 0, group); + check_status(status); + + // Fill profiling context entry + entry->agent = callback_data->agent; + entry->group = *group; + entry->data = *callback_data; + entry->data.kernel_name = strdup(callback_data->kernel_name); + reinterpret_cast*>(&entry->valid)->store(true); + + return HSA_STATUS_SUCCESS; +} + +int main() { + bool ret_val = false; + const char* kiter_s = getenv("ROCP_KITER"); + const char* diter_s = getenv("ROCP_DITER"); + const unsigned kiter = (kiter_s != NULL) ? atol(kiter_s) : 1; + const unsigned diter = (diter_s != NULL) ? atol(diter_s) : 1; + + // Instantiate HSA resources + HsaRsrcFactory::Create(); + + // Getting GPU device info + const AgentInfo* agent_info = NULL; + if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort(); + + // Creating the queue + hsa_queue_t* queue = NULL; + if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue) == false) abort(); + + // Adding dispatch observer + rocprofiler_queue_callbacks_t callbacks_ptrs{}; + callbacks_ptrs.dispatch = dispatch_callback; + rocprofiler_set_queue_callbacks(callbacks_ptrs, NULL); + + // Test initialization + TestHsa::SetQueue(queue); + TestHsa::HsaInstantiate(0); + + for (unsigned ind = 0; ind < kiter; ++ind) { + printf("Iterastion %u:\n", ind); + ret_val = RunKernel(0, NULL, diter); + if (ret_val) ret_val = RunKernel(0, NULL, diter); + } + + TestHsa::HsaShutdown(); + + return (ret_val) ? 0 : 1; +} diff --git a/test/ctrl/run_kernel.h b/test/ctrl/run_kernel.h index 64aa3d3f8e..23d7ea0385 100644 --- a/test/ctrl/run_kernel.h +++ b/test/ctrl/run_kernel.h @@ -29,6 +29,9 @@ THE SOFTWARE. template bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) { bool ret_val = false; + if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL); + + // Create test kernel object Kernel test_kernel; TestAql* test_aql = new TestHsa(&test_kernel); diff --git a/test/ctrl/test_hsa.cpp b/test/ctrl/test_hsa.cpp index 47d0f54e4c..e321b72cac 100644 --- a/test/ctrl/test_hsa.cpp +++ b/test/ctrl/test_hsa.cpp @@ -50,15 +50,6 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) { return NULL; } std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl; - - // Create an instance of Aql Queue - if (hsa_queue_ == NULL) { - uint32_t num_pkts = 128; - if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { - hsa_queue_ = NULL; - TEST_ASSERT(false); - } - } } return hsa_rsrc_; } @@ -74,6 +65,15 @@ void TestHsa::HsaShutdown() { bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) { std::clog << "TestHsa::Initialize :" << std::endl; + // Create an instance of Aql Queue + if (hsa_queue_ == NULL) { + uint32_t num_pkts = 128; + if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { + hsa_queue_ = NULL; + TEST_ASSERT(false); + } + } + // Instantiate a Timer object setup_timer_idx_ = hsa_timer_.CreateTimer(); dispatch_timer_idx_ = hsa_timer_.CreateTimer(); @@ -222,7 +222,7 @@ bool TestHsa::Run() { // Submit AQL packet to the queue const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql); - std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl; + std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush; // Wait on the dispatch signal until the kernel is finished. // Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling @@ -283,5 +283,7 @@ void TestHsa::PrintTime() { bool TestHsa::Cleanup() { hsa_executable_destroy(hsa_exec_); hsa_signal_destroy(hsa_signal_); + hsa_queue_destroy(hsa_queue_); + hsa_queue_ = NULL; return true; } diff --git a/test/run.sh b/test/run.sh index 8d8ca8fffd..abb1ba0725 100755 --- a/test/run.sh +++ b/test/run.sh @@ -32,9 +32,17 @@ export ROCPROFILER_LOG=1 unset ROCP_PROXY_QUEUE # ROC profiler metrics config file export ROCP_METRICS=metrics.xml +# test trace +export ROC_TEST_TRACE=1 +# tool library loaded by ROC profiler +export ROCP_TOOL_LIB=./test/libintercept_test.so +../bin/run_tool.sh ./test/ctrl + +unset ROCP_TOOL_LIB eval ./test/standalone_test + # tool library loaded by ROC profiler export ROCP_TOOL_LIB=libtool.so # ROC profiler kernels timing @@ -47,16 +55,16 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then export ROCP_TOOL_LIB=test/libtool.so fi -export ROCP_KITER=100 -export ROCP_DITER=100 -export ROCP_INPUT=input.xml -eval ./test/ctrl - export ROCP_KITER=1 export ROCP_DITER=4 export ROCP_INPUT=input1.xml eval ./test/ctrl +export ROCP_KITER=100 +export ROCP_DITER=100 +export ROCP_INPUT=input.xml +eval ./test/ctrl + #valgrind --leak-check=full $tbin #valgrind --tool=massif $tbin #ms_print massif.out. diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 373f1f7bd5..2317595f5d 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -417,7 +417,8 @@ bool dump_context_entry(context_entry_t* entry) { index, entry->data.queue_index, nik_name.c_str()); - if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", + if (record) fprintf(file_handle, ", gpu-id(%u), time(%lu,%lu,%lu,%lu)", + HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, record->dispatch, record->begin, record->end,