From f730fb6a4e68117db576a347a4db39b6cea40174 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 6 Nov 2019 10:54:45 -0600 Subject: [PATCH 01/94] Revise the return type of roctracer_next_record to roctracer_status_t. --- inc/roctracer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/roctracer.h b/inc/roctracer.h index 5f469616d2..812fc464f4 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -137,7 +137,7 @@ roctracer_status_t roctracer_disable_callback(); typedef activity_record_t roctracer_record_t; // Return next record -static inline int roctracer_next_record( +static inline roctracer_status_t roctracer_next_record( const activity_record_t* record, // [in] record ptr const activity_record_t** next) // [out] next record ptr { From 719e2a42c870eb2c99f1f8d6ffa98e3e7c8dc2f0 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:14:23 -0500 Subject: [PATCH 02/94] Update CMakeLists.txt --- CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e90a4f7924..06d88d8d76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,9 +110,7 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION in install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) ## KFD wrapper -if ( DEFINED KFD_WRAPPER ) - install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) -endif () +install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) From ad5080dabc662c01fdcfd1b9734385dc64506e9c Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:23:41 -0500 Subject: [PATCH 03/94] Update env.cmake --- cmake_modules/env.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cmake_modules/env.cmake b/cmake_modules/env.cmake index 9ad3fbf23b..da09ad4f5c 100644 --- a/cmake_modules/env.cmake +++ b/cmake_modules/env.cmake @@ -50,11 +50,6 @@ else() set ( HIP_VDI 0 ) endif() -## Enable KFD wrapper -if ( DEFINED KFD_WRAPPER ) - add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} ) -endif() - ## Enable HIP/HCC local build if ( DEFINED LOCAL_BUILD ) add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} ) @@ -136,6 +131,5 @@ message ( "-------------HCC-Inc: ${HCC_INC_DIR}" ) message ( "-------------HIP-Inc: ${HIP_INC_DIR}" ) message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" ) message ( "-------------HIP-VDI: ${HIP_VDI}" ) -message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" ) message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" ) message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" ) From e177ff2c1ff7a7678a701dbf1d3a07159d7eff1d Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:24:57 -0500 Subject: [PATCH 04/94] Update CMakeLists.txt --- src/CMakeLists.txt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44b9fd81a3..7a65896baa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,17 +17,15 @@ target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" ) # Generating KFD/Thunk tracing primitives -if ( DEFINED KFD_WRAPPER ) - set ( KFD_LIB "kfdwrapper64" ) - set ( KFD_LIB_SRC - ${LIB_DIR}/kfd/kfd_wrapper.cpp - ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) - add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) - target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) - target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) -endif() +set ( KFD_LIB "kfdwrapper64" ) +set ( KFD_LIB_SRC + ${LIB_DIR}/kfd/kfd_wrapper.cpp +) +execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) +add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) +target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) +target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) +execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) set ( ROCTX_LIB "roctx64" ) set ( ROCTX_LIB_SRC From e57a2125d79a055853efe0781157e578cb8f4ba0 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:26:52 -0500 Subject: [PATCH 05/94] Update roctracer.cpp --- src/core/roctracer.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 1ae25fb885..50feac1bce 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -27,9 +27,7 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" -#ifdef KFD_WRAPPER #include "inc/roctracer_kfd.h" -#endif #include #include @@ -567,12 +565,10 @@ PUBLIC_API const char* roctracer_op_string( return roctracer::HipLoader::Instance().ApiName(op); break; } -#if KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { return roctracer::kfd_support::GetApiName(op); break; } -#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -593,13 +589,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code( if (kind != NULL) *kind = 0; break; } -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { *op = roctracer::kfd_support::GetApiCode(str); if (kind != NULL) *kind = 0; break; } -#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")"); } @@ -612,9 +606,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; -#endif case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: @@ -631,13 +623,11 @@ static roctracer_status_t roctracer_enable_callback_fun( void* user_data) { switch (domain) { -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { roctracer::hsa_support::cb_table.set(op, callback, user_data); @@ -712,13 +702,11 @@ static roctracer_status_t roctracer_disable_callback_fun( uint32_t op) { switch (domain) { -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; @@ -1046,12 +1034,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( break; } -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { roctracer::kfd_support::intercept_KFDApiTable(); break; } -#endif case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); From f0b49ac122924f6327583f761276f0d9cf2513cd Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:27:33 -0500 Subject: [PATCH 06/94] Update CMakeLists.txt --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8f04bce451..3be85fc399 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,7 +33,7 @@ set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) add_custom_target( mytest COMMAND make -C "${TEST_DIR}/MatrixTranspose" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" - COMMAND HIP_VDI=${HIP_VDI} KFD_WRAPPER=${KFD_WRAPPER} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" ) From 475b3619d556845fe482858eada3c885550ed447 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:28:30 -0500 Subject: [PATCH 07/94] Update MatrixTranspose.cpp --- test/MatrixTranspose_test/MatrixTranspose.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 34007cf20f..f8391d784c 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -32,9 +32,7 @@ THE SOFTWARE. #include // kfd header file -#ifdef KFD_WRAPPER #include -#endif #ifndef ITERATIONS # define ITERATIONS 101 @@ -208,7 +206,6 @@ void api_callback( fprintf(stdout, "ROCTX: \"%s\"\n", data->args.message); return; } -#ifdef KFD_WRAPPER if (domain == ACTIVITY_DOMAIN_KFD_API) { const kfd_api_data_t* data = reinterpret_cast(callback_data); fprintf(stdout, "KFD: <%s id(%u)\tcorrelation_id(%lu) %s> \n", @@ -218,7 +215,6 @@ void api_callback( (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); return; } -#endif const hip_api_data_t* data = reinterpret_cast(callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), From 3aa084a248707900a1fba611d14fbc4b7dcf9e15 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 12 Nov 2019 17:29:43 -0500 Subject: [PATCH 08/94] Update tracer_tool.cpp --- test/tool/tracer_tool.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 71da132baa..e7cab52fa2 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -37,9 +37,7 @@ THE SOFTWARE. #include #include #include -#ifdef KFD_WRAPPER #include -#endif #include #include #include @@ -437,7 +435,6 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { // KFD API tracing // KFD API callback function -#ifdef KFD_WRAPPER void kfd_api_callback( uint32_t domain, uint32_t cid, @@ -455,7 +452,6 @@ void kfd_api_callback( fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); } } -#endif /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -738,7 +734,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, err = pthread_create(&thread, &attr, control_thr_fun, NULL); } -#ifdef KFD_WRAPPER // Enable KFD API callbacks/activity if (trace_kfd) { kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); @@ -759,7 +754,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } printf(")\n"); } -#endif if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); From 19ad236bcf51025295080798a19f4b287bec7eb8 Mon Sep 17 00:00:00 2001 From: Rachida Kebichi Date: Tue, 12 Nov 2019 18:06:34 -0500 Subject: [PATCH 09/94] change permission to x --- script/gen_ostream_ops.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 script/gen_ostream_ops.py diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py old mode 100644 new mode 100755 From f2b6a6b35c6e847d45fdc61b722f19e3f8cc4317 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 13 Nov 2019 11:57:17 -0600 Subject: [PATCH 10/94] update: dlopen of target runtimes with NOLOAD; enabled KFD domain in explicite test --- .gitignore | 1 + src/core/loader.h | 2 +- test/MatrixTranspose_test/MatrixTranspose.cpp | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cafda6d07e..bd206b0038 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ b build inc/hsa_prof_str.h inc/kfd_prof_str.h +inc/kfd_ostream_ops.h test/hsa test/MatrixTranspose/MatrixTranspose test/MatrixTranspose_test/MatrixTranspose diff --git a/src/core/loader.h b/src/core/loader.h index fa2b0e62ad..9f62225d05 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -46,7 +46,7 @@ class BaseLoader : public T { private: BaseLoader() { - const int flags = RTLD_LAZY; + const int flags = RTLD_LAZY|RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); if (handle_ == NULL) { fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index f8391d784c..3e5b89c868 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -314,6 +314,8 @@ void init_tracing() { // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + // Enable KFD API tracing + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); } // Start tracing routine @@ -329,6 +331,7 @@ void stop_tracing() { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); ROCTRACER_CALL(roctracer_flush_activity()); std::cout << "# STOP #############################" << std::endl << std::flush; } From e16afba208163b486153ea50cf3225c7b879c6b5 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Thu, 14 Nov 2019 10:13:08 -0600 Subject: [PATCH 11/94] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f80fe90a2d..770ff47d46 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,10 @@ asyncronous activity records pool support. ## To build and run test ``` - ROCm-2.3 or higher is required - - cd + - Python2.7 is required. + The required modules: CppHeaderParser, argparse. + To instaLL: + sudo pip install CppHeaderParser argparse - CLone development branch of roctracer: git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer From 504b7b26c6ad1ee697f6a2bbf73fdfe96a9a1d04 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Thu, 14 Nov 2019 10:13:42 -0600 Subject: [PATCH 12/94] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 770ff47d46..8fca37a2fd 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ asyncronous activity records pool support. - ROCm-2.3 or higher is required - Python2.7 is required. The required modules: CppHeaderParser, argparse. - To instaLL: + To install: sudo pip install CppHeaderParser argparse - CLone development branch of roctracer: From 44b5860b0e2f0afe56ecaf05a8b5b6e178addd1f Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 15 Nov 2019 12:29:34 -0600 Subject: [PATCH 13/94] cosmetic change --- test/tool/tracer_tool.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index e7cab52fa2..f300c69388 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -268,6 +268,9 @@ void hsa_activity_callback( index++; } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HIP API tracing + struct hip_api_trace_entry_t { uint32_t valid; uint32_t type; @@ -282,9 +285,6 @@ struct hip_api_trace_entry_t { void* ptr; }; -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// HIP API tracing - void hip_api_flush_cb(hip_api_trace_entry_t* entry); roctracer::TraceBuffer::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}}; roctracer::TraceBuffer hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1); From 8e85eb1cefb5c1118f0b963738773d653a98b967 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 19 Nov 2019 23:19:11 -0600 Subject: [PATCH 14/94] global counter optimizing --- src/core/roctracer.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 50feac1bce..6a7cf115c0 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -249,18 +249,16 @@ class GlobalCounter { public: typedef std::mutex mutex_t; typedef uint64_t counter_t; + typedef std::atomic atomic_counter_t; - static counter_t Increment() { - std::lock_guard lock(mutex_); - return ++counter_; - } + static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); } private: static mutex_t mutex_; - static counter_t counter_; + static atomic_counter_t counter_; }; GlobalCounter::mutex_t GlobalCounter::mutex_; -GlobalCounter::counter_t GlobalCounter::counter_ = 0; +GlobalCounter::atomic_counter_t GlobalCounter::counter_{1}; // Records storage struct roctracer_api_data_t { @@ -282,6 +280,7 @@ typedef std::map correlati typedef std::mutex correlation_id_mutex_t; correlation_id_map_t* correlation_id_map = NULL; correlation_id_mutex_t correlation_id_mutex; +bool correlation_id_wait = false; static thread_local std::stack external_id_stack; @@ -294,6 +293,7 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) { auto it = correlation_id_map->find(correlation_id); + if (correlation_id_wait) while (it == correlation_id_map->end()) it = correlation_id_map->find(correlation_id); if (it == correlation_id_map->end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")"); return it->second; } @@ -817,6 +817,10 @@ static roctracer_status_t roctracer_enable_activity_fun( case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { if (roctracer::HccLoader::GetRef() == NULL) { + if (getenv("ROCP_HCC_CORRID_WAIT") != NULL) { + roctracer::correlation_id_wait = true; + fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout); + } roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, (void*)roctracer::HCC_AsyncActivityCallback, (void*)pool); From a7c2d452741a2f28a797e27e8e0f38d7a95cec4b Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 20 Nov 2019 17:09:32 +0000 Subject: [PATCH 15/94] Introduce multi-GPU MatrixTranspose sample application. This sample application would launch a kernel on every available GPU, so it's possible to monitor activities on every GPU. This could be used to help verify roctracer is working properly on a multi-GPU scenario. --- test/MatrixTranspose_test_mgpu/Makefile | 40 +++ .../MatrixTranspose.cpp | 313 ++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100644 test/MatrixTranspose_test_mgpu/Makefile create mode 100644 test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp diff --git a/test/MatrixTranspose_test_mgpu/Makefile b/test/MatrixTranspose_test_mgpu/Makefile new file mode 100644 index 0000000000..da9971b371 --- /dev/null +++ b/test/MatrixTranspose_test_mgpu/Makefile @@ -0,0 +1,40 @@ +ROOT_PATH = ../.. +LIB_PATH = $(ROOT_PATH)/build +ROC_LIBS = -L$(LIB_PATH) -lroctracer64 +export LD_LIBRARY_PATH=$(LIB_PATH) +HIP_VDI ?= 0 +ITERATIONS ?= 1 + +HIP_PATH?= $(wildcard /opt/rocm/hip) +ifeq (,$(HIP_PATH)) + HIP_PATH=../../.. +endif + +HIPCC=$(HIP_PATH)/bin/hipcc + +TARGET=hcc + +SOURCES = MatrixTranspose.cpp +OBJECTS = $(SOURCES:.cpp=.o) + +EXECUTABLE=./MatrixTranspose + +.PHONY: test + + +all: clean $(EXECUTABLE) + +CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) +CXX=$(HIPCC) + +$(EXECUTABLE): $(OBJECTS) + $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) + +test: $(EXECUTABLE) + $(EXECUTABLE) + +clean: + rm -f $(EXECUTABLE) + rm -f $(OBJECTS) + rm -f $(HIP_PATH)/src/*.o + diff --git a/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp b/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp new file mode 100644 index 0000000000..ffd4c88109 --- /dev/null +++ b/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp @@ -0,0 +1,313 @@ +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +// roctracer extension API +#include + +// hip header file +#include + +#ifndef ITERATIONS +# define ITERATIONS 1 +#endif +#define WIDTH 1024 + + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +// Device (Kernel) function, it must be void +__global__ void matrixTranspose(float* out, float* in, const int width) { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + out[y * width + x] = in[x * width + y]; +} + +// CPU implementation of matrix transpose +void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + +int iterations = ITERATIONS; +void init_tracing(); +void start_tracing(); +void stop_tracing(); + +int main() { + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; + + float* gpuMatrix; + float* gpuTransposeMatrix; + + int i; + int errors; + + int gpuCount = 0; + hipGetDeviceCount(&gpuCount); + std::cout << "Number of GPUs: " << gpuCount << std::endl; + + init_tracing(); + + while (iterations-- > 0) { + start_tracing(); + + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } + + for (i = 0; i < gpuCount; ++i) { + // switch GPU. + hipSetDevice(i); + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + std::cout << "Device name " << devProp.name << std::endl; + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + + // Lauching kernel from host + hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, + gpuMatrix, WIDTH); + + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + + hipStreamSynchronize(0); + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + } + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + errors = 0; + double eps = 1.0E-6; + for (i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + + stop_tracing(); + } + + return errors; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// HIP Callbacks/Activity tracing +// +#if 1 +#include +#include + +// Macro to check ROC-tracer calls status +#define ROCTRACER_CALL(call) \ + do { \ + int err = call; \ + if (err != 0) { \ + std::cerr << roctracer_error_string() << std::endl << std::flush; \ + abort(); \ + } \ + } while (0) + +// Runtime API callback function +void api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + std::cout << "### api_callback IN\n"; + (void)arg; + + //if (domain == ACTIVITY_DOMAIN_ROCTX) { + // const roctx_api_data_t* data = reinterpret_cast(callback_data); + // fprintf(stdout, "ROCTX: \"%s\"\n", data->args.message); + // return; + //} + + if (domain == ACTIVITY_DOMAIN_HCC_OPS) { + fprintf(stdout, "HCC OPS\n"); + return; + } + + if (domain == ACTIVITY_DOMAIN_HSA_API) { + fprintf(stdout, "HSA API\n"); + return; + } + + const hip_api_data_t* data = reinterpret_cast(callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + switch (cid) { + case HIP_API_ID_hipMemcpy: + fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)", + data->args.hipMemcpy.dst, + data->args.hipMemcpy.src, + (uint32_t)(data->args.hipMemcpy.sizeBytes), + (uint32_t)(data->args.hipMemcpy.kind)); + break; + case HIP_API_ID_hipMalloc: + fprintf(stdout, "ptr(%p) size(0x%x)", + data->args.hipMalloc.ptr, + (uint32_t)(data->args.hipMalloc.size)); + break; + case HIP_API_ID_hipFree: + fprintf(stdout, "ptr(%p)", + data->args.hipFree.ptr); + break; + case HIP_API_ID_hipModuleLaunchKernel: + fprintf(stdout, "kernel(\"%s\") stream(%p)", + hipKernelNameRef(data->args.hipModuleLaunchKernel.f), + data->args.hipModuleLaunchKernel.stream); + break; + default: + break; + } + } else { + switch (cid) { + case HIP_API_ID_hipMalloc: + fprintf(stdout, "*ptr(0x%p)", + *(data->args.hipMalloc.ptr)); + break; + default: + break; + } + } + fprintf(stdout, "\n"); fflush(stdout); +} + +// Activity tracing callback +// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) +void activity_callback(const char* begin, const char* end, void* arg) { + std::cout << "### activity_callback IN\n"; + const roctracer_record_t* record = reinterpret_cast(begin); + const roctracer_record_t* end_record = reinterpret_cast(end); + fprintf(stdout, "\tActivity records:\n"); fflush(stdout); + while (record < end_record) { + const char * name = roctracer_op_string(record->domain, record->op, record->kind); + fprintf(stdout, "\tdomain(%u)", record->domain); + fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", + name, + record->correlation_id, + record->begin_ns, + record->end_ns + ); + if (record->domain == ACTIVITY_DOMAIN_HIP_API) { + fprintf(stdout, " process_id(%u) thread_id(%u)", + record->process_id, + record->thread_id + ); + } else if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { + fprintf(stdout, " device_id(%d) queue_id(%lu)", + record->device_id, + record->queue_id + ); + if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { + fprintf(stdout, " external_id(%lu)", + record->external_id + ); + } else { + fprintf(stderr, "Bad domain %d\n", record->domain); + //abort(); + } + fprintf(stdout, "\n"); + fflush(stdout); + ROCTRACER_CALL(roctracer_next_record(record, &record)); + } +} + +// Init tracing routine +void init_tracing() { + std::cout << "# INIT #############################" << std::endl << std::flush; + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 0x1000; + properties.buffer_callback_fun = activity_callback; + properties.buffer_callback_arg = &properties; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + // Enable API callbacks + ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL)); + // Enable activity tracing + ROCTRACER_CALL(roctracer_enable_activity()); +} + +// Start tracing routine +void start_tracing() { + std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush; +} + +// Stop tracing routine +void stop_tracing() { + ROCTRACER_CALL(roctracer_disable_callback()); + + ROCTRACER_CALL(roctracer_disable_activity()); + ROCTRACER_CALL(roctracer_flush_activity()); + std::cout << "# STOP #############################" << std::endl << std::flush; +} +#else +void init_tracing() {} +void start_tracing() {} +void stop_tracing() {} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From ac5390cdb21d94a5e25ab7c10f0da0d1ade77e66 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 21 Nov 2019 17:26:01 -0600 Subject: [PATCH 16/94] adding rpath for tests to use teh proper profiling library --- test/MatrixTranspose/Makefile | 3 +-- test/MatrixTranspose_test/Makefile | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test/MatrixTranspose/Makefile b/test/MatrixTranspose/Makefile index daa48b2561..41727de47e 100644 --- a/test/MatrixTranspose/Makefile +++ b/test/MatrixTranspose/Makefile @@ -1,7 +1,6 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -export LD_LIBRARY_PATH=$(LIB_PATH) +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so HIP_PATH?= $(wildcard /opt/rocm/hip) ifeq (,$(HIP_PATH)) diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index 202980b804..2a767a5626 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -1,8 +1,7 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64 +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so HSA_KMT_INC_PATH ?= /opt/rocm/include -export LD_LIBRARY_PATH=$(LIB_PATH) HIP_VDI ?= 0 ITERATIONS ?= 100 From 2ee7893e6d8f715baa21078c5a1273b67c5b00ca Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Fri, 22 Nov 2019 17:49:37 -0600 Subject: [PATCH 17/94] Update README.md --- README.md | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8fca37a2fd..b4b92c6f15 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,36 @@ # ROC-tracer ``` -ROC-tracer library, Runtimes Generic Callback/Activity APIs. +ROC-tracer library: Runtimes Generic Callback/Activity APIs. The goal of the implementation is to provide a generic independent from specific runtime profiler to trace API and asyncronous activity. The API provides functionality for registering the runtimes API callbacks and asyncronous activity records pool support. + +ROC-TX librray: code annotation evemts API +Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ``` ## The library source tree ``` - - inc/roctracer.h - Library public API + - inc/roctracer.h - rocTacer library public API header + - inc/roctx.h - rocTX library puiblic API header - src - Library sources - - core - Library API sources - - util - Library utils sources + - core - rocTracer library API sources + - roctx - rocTX library API sources + - util - library utils sources - test - test suit - MatrixTranspose - test based on HIP MatrixTranspose sample ``` ## Documentation ``` - - API description: inc/roctracer.h - - Code example: test/MatrixTranspose_test/MatrixTranspose.cpp + - API description/headers: + - inc/roctracer.h + - inc/roctx.h + - Code examples: + - test/MatrixTranspose_test/MatrixTranspose.cpp + - test/MatrixTranspose/MatrixTranspose.cpp ``` ## To build and run test @@ -53,3 +62,16 @@ asyncronous activity records pool support. or make package && dpkg -i *.deb ``` + +## Usage +``` +rocTracer API: + To use the rocTracer API you need the API header and to link your application with roctracer .so librray: + - the API header: /opt/rocm/roctracer/include/roctracer.h + - the .so library: /opt/rocm/lib/libroctracer64.so + +rocTX API: + To use the rocTX API you need the API header and to link your application with rictx .so librray: + - the API header: /opt/rocm/roctracer/include/roctx.h + - the .so library: /opt/rocm/lib/libroctx64.so + From 127a6e1f43a8e0db08e7aafaf14cd00f9f0f2b4f Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Fri, 22 Nov 2019 17:50:30 -0600 Subject: [PATCH 18/94] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b4b92c6f15..9f9cf78fbc 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ rocTracer API: - the .so library: /opt/rocm/lib/libroctracer64.so rocTX API: - To use the rocTX API you need the API header and to link your application with rictx .so librray: + To use the rocTX API you need the API header and to link your application with roctx .so librray: - the API header: /opt/rocm/roctracer/include/roctx.h - the .so library: /opt/rocm/lib/libroctx64.so From a0a993bf0d408f51e4d3d0f7a6d6ff9371bababe Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 25 Nov 2019 16:16:47 -0600 Subject: [PATCH 19/94] fix RPM packaging, setting rpm_post as RPM_POST_INSTALL --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06d88d8d76..cda9887338 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,7 +127,7 @@ set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" ) set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" ) ## RPM package specific variables -set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) +set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" ) include ( CPack ) From 74bf95082a0c14d477a895824bc36550112b4baa Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 25 Nov 2019 16:17:29 -0600 Subject: [PATCH 20/94] adding roctx instrumentation to MatrixTranspose test --- test/MatrixTranspose/Makefile | 2 +- test/MatrixTranspose/MatrixTranspose.cpp | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/MatrixTranspose/Makefile b/test/MatrixTranspose/Makefile index 41727de47e..63fae09943 100644 --- a/test/MatrixTranspose/Makefile +++ b/test/MatrixTranspose/Makefile @@ -1,6 +1,6 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so HIP_PATH?= $(wildcard /opt/rocm/hip) ifeq (,$(HIP_PATH)) diff --git a/test/MatrixTranspose/MatrixTranspose.cpp b/test/MatrixTranspose/MatrixTranspose.cpp index d2ecfb8484..264cf2d93b 100644 --- a/test/MatrixTranspose/MatrixTranspose.cpp +++ b/test/MatrixTranspose/MatrixTranspose.cpp @@ -23,8 +23,10 @@ THE SOFTWARE. #include // hip header file -#include "hip/hip_runtime.h" +#include #include "roctracer_ext.h" +// roctx header file +#include #define WIDTH 1024 @@ -94,15 +96,23 @@ int main() { hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); roctracer_mark("before HIP LaunchKernel"); + roctxMark("before hipLaunchKernel"); + roctxRangePush("hipLaunchKernel"); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); roctracer_mark("after HIP LaunchKernel"); + roctxMark("after hipLaunchKernel"); // Memory transfer from device to host + roctxRangePush("hipMemcpy"); + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" + // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); From e09d753ef794cf258aa34381cc1c57480564b9f5 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 26 Nov 2019 15:28:54 -0600 Subject: [PATCH 21/94] enabling of KFD API callbacks to MatricTranspose_test --- test/MatrixTranspose_test/MatrixTranspose.cpp | 5 +++-- test/run.sh | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 3e5b89c868..57ab83ef18 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -203,12 +203,12 @@ void api_callback( if (domain == ACTIVITY_DOMAIN_ROCTX) { const roctx_api_data_t* data = reinterpret_cast(callback_data); - fprintf(stdout, "ROCTX: \"%s\"\n", data->args.message); + fprintf(stdout, "\n", data->args.message); return; } if (domain == ACTIVITY_DOMAIN_KFD_API) { const kfd_api_data_t* data = reinterpret_cast(callback_data); - fprintf(stdout, "KFD: <%s id(%u)\tcorrelation_id(%lu) %s> \n", + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> \n", roctracer_op_string(ACTIVITY_DOMAIN_KFD_API, cid, 0), cid, data->correlation_id, @@ -315,6 +315,7 @@ void init_tracing() { ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); // Enable KFD API tracing + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); } diff --git a/test/run.sh b/test/run.sh index af56efe5f3..4a824aae37 100755 --- a/test/run.sh +++ b/test/run.sh @@ -65,7 +65,7 @@ eval_test() { # Standalone test # rocTrecer is used explicitely by test -eval_test "standalone HIP test" "./test/MatrixTranspose_test" +eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" # Tool test # rocTracer/tool is loaded by HSA runtime From 0560d99d0fc9d0c356562dd09dc5aee78094d806 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 27 Nov 2019 19:10:53 -0600 Subject: [PATCH 22/94] hip_vdi: disabling calllback/activity disable --- src/core/loader.h | 6 +++--- src/core/roctracer.cpp | 6 +++++- test/MatrixTranspose_test/MatrixTranspose.cpp | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/core/loader.h b/src/core/loader.h index 9f62225d05..c7e95734b2 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -110,9 +110,9 @@ class HccApi { protected: void init(Loader* loader) { #if HIP_VDI - InitActivityCallback = loader->GetFun("InitActivityCallback"); - EnableActivityCallback = loader->GetFun("EnableActivityCallback"); - GetOpName = loader->GetFun("GetCmdName"); + InitActivityCallback = loader->GetFun("hipInitActivityCallback"); + EnableActivityCallback = loader->GetFun("hipEnableActivityCallback"); + GetOpName = loader->GetFun("hipGetCmdName"); #else InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 6a7cf115c0..0259c83e31 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -711,8 +711,10 @@ static roctracer_status_t roctracer_disable_callback_fun( case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { +#if !HIP_VDI hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); +#endif break; } case ACTIVITY_DOMAIN_ROCTX: { @@ -900,8 +902,10 @@ static roctracer_status_t roctracer_disable_activity_fun( break; } case ACTIVITY_DOMAIN_HIP_API: { +#if !HIP_VDI const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); +#endif break; } case ACTIVITY_DOMAIN_ROCTX: break; @@ -1052,7 +1056,7 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( } case ACTIVITY_DOMAIN_HCC_OPS: case ACTIVITY_DOMAIN_HIP_API: { -#ifdef HIP_VDI +#if HIP_VDI const char* hip_lib_name = "libamdhip64.so"; roctracer::HccLoader::SetLibName(hip_lib_name); roctracer::HipLoader::SetLibName(hip_lib_name); diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 57ab83ef18..1a7b2cb97a 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -317,6 +317,8 @@ void init_tracing() { // Enable KFD API tracing ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); + // Enable rocTX + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL)); } // Start tracing routine From 3bc5beb48063c118a158e2c347a19bfbdffc0dbd Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 27 Nov 2019 19:16:27 -0600 Subject: [PATCH 23/94] adding sys|roctx test --- test/run.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/run.sh b/test/run.sh index 4a824aae37..ccf646aed6 100755 --- a/test/run.sh +++ b/test/run.sh @@ -70,12 +70,15 @@ eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTran # Tool test # rocTracer/tool is loaded by HSA runtime export HSA_TOOLS_LIB="test/libtracer_tool.so" -export ROCTRACER_DOMAIN="hip" -# HIP test -eval_test "tool HIP test" ./test/MatrixTranspose -# with trace sampling control -eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" +# SYS test +export ROCTRACER_DOMAIN="sys:roctx" +eval_test "tool SYS test" ./test/MatrixTranspose +export ROCTRACER_DOMAIN="sys:hsa:roctx" +eval_test "tool SYS/HSA test" ./test/MatrixTranspose +# Tracing control +export ROCTRACER_DOMAIN="hip" +eval_test "tool period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" # HSA test export ROCTRACER_DOMAIN="hsa" From 7138c5787b7315c921ff743998ffa4ca97a82191 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 27 Nov 2019 19:39:32 -0600 Subject: [PATCH 24/94] enabling hip id wait for HIP-VDI --- src/core/roctracer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 0259c83e31..f0661088c0 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -1060,6 +1060,7 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( const char* hip_lib_name = "libamdhip64.so"; roctracer::HccLoader::SetLibName(hip_lib_name); roctracer::HipLoader::SetLibName(hip_lib_name); + roctracer::correlation_id_wait = true; #endif mark_api_callback_ptr = reinterpret_cast(properties); break; From 8475e25e7acfe2ac2c93823f6b1bca9c0263af90 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 3 Dec 2019 09:43:50 -0600 Subject: [PATCH 25/94] kfd trace fix: disbale recursive callback; disable open/close API tracing; --- script/kfdap.py | 5 ++--- test/tool/tracer_tool.cpp | 4 ++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/script/kfdap.py b/script/kfdap.py index d822cf2752..c46bf60d78 100755 --- a/script/kfdap.py +++ b/script/kfdap.py @@ -406,8 +406,7 @@ class API_DescrParser: call_id = self.api_id[call]; ret_type = struct['ret'] self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' + - if call == 'hsaKmtOpenKFD': - self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n' + self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n' self.content_h += ' kfd_api_data_t api_data{};\n' for var in struct['alst']: self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n' @@ -525,7 +524,7 @@ class API_DescrParser: self.content_cpp += ' return true;\n'; self.content_cpp += '}\n\n'; - if call != '-': + if call != '-' and call != 'hsaKmtCloseKFD' and call != 'hsaKmtOpenKFD': self.content_cpp += 'PUBLIC_API ' + struct['ret'] + " " + call + '(' + struct['args'] + ') { return roctracer::kfd_support::' + call + '_callback(' for i in range(0,len(struct['alst'])): if i == (len(struct['alst'])-1): diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index f300c69388..ad1e9ee3f7 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -435,6 +435,7 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { // KFD API tracing // KFD API callback function +static thread_local bool in_kfd_api_callback = false; void kfd_api_callback( uint32_t domain, uint32_t cid, @@ -442,6 +443,8 @@ void kfd_api_callback( void* arg) { (void)arg; + if (in_kfd_api_callback) return; + in_kfd_api_callback = true; const kfd_api_data_t* data = reinterpret_cast(callback_data); if (data->phase == ACTIVITY_API_PHASE_ENTER) { kfd_begin_timestamp = timer->timestamp_fn_ns(); @@ -451,6 +454,7 @@ void kfd_api_callback( os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); } + in_kfd_api_callback = false; } /////////////////////////////////////////////////////////////////////////////////////////////////////// From 5a76e40d43178989672d6689168c57b92d5e8727 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Wed, 4 Dec 2019 16:34:23 -0600 Subject: [PATCH 26/94] Update LICENSE --- LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE b/LICENSE index 9e78331e70..8384c9857d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +[MITx11 License] Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 7b3ce9a6162660c028f8ccad6fc7b5131e09f74f Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 5 Dec 2019 11:22:47 -0600 Subject: [PATCH 27/94] porting to clang --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3be85fc399..923384c444 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -49,7 +49,7 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout 7defb6d; fi" ) +execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout a3aabb5; fi" ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa ) From 76f85f8265253738c3bdd8387ac444d4c1a076b1 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 5 Dec 2019 17:59:31 -0600 Subject: [PATCH 28/94] onload trace --- test/tool/tracer_tool.cpp | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index ad1e9ee3f7..92fdbf7a52 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -57,9 +57,12 @@ THE SOFTWARE. } \ } while (0) -#ifndef onload_debug -#define onload_debug false -#endif +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ + } while(0); +#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") +#define ONLOAD_TRACE_END() ONLOAD_TRACE("end") typedef hsa_rt_utils::Timer::timestamp_t timestamp_t; hsa_rt_utils::Timer* timer = NULL; @@ -529,7 +532,7 @@ void close_output_file(FILE* file_handle) { // HSA-runtime tool on-load method extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); // Output file @@ -759,15 +762,15 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } - if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); } // tool unload method void tool_unload(bool destruct) { static bool is_unloaded = false; + ONLOAD_TRACE("begin (" << destruct <<", " << is_unloaded << ")"); - if (onload_debug) { printf("TOOL tool_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); } if (destruct == false) return; if (is_unloaded == true) return; is_unloaded = true; @@ -806,21 +809,21 @@ void tool_unload(bool destruct) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); fclose(kfd_api_file_handle); } - if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } // HSA-runtime on-unload method extern "C" PUBLIC_API void OnUnload() { - if (onload_debug) { printf("TOOL OnUnload\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); tool_unload(false); - if (onload_debug) { printf("TOOL OnUnload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } extern "C" CONSTRUCTOR_API void constructor() { - if (onload_debug) { printf("TOOL constructor ...end\n"); fflush(stdout); } + ONLOAD_TRACE("") } extern "C" DESTRUCTOR_API void destructor() { - if (onload_debug) { printf("TOOL destructor\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); tool_unload(true); - if (onload_debug) { printf("TOOL destructor end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } From 9d375424704d8b6c71e5dfa2e560f84562988649 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 5 Dec 2019 18:08:17 -0600 Subject: [PATCH 29/94] lib onload trace --- src/core/roctracer.cpp | 34 +++++++++++++++++++--------------- test/tool/tracer_tool.cpp | 2 +- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index f0661088c0..8eac5a1f20 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -85,9 +85,13 @@ THE SOFTWARE. (void)err; \ return X; -#ifndef onload_debug -#define onload_debug false -#endif +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ + } while(0); +#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") +#define ONLOAD_TRACE_END() ONLOAD_TRACE("end") + static inline uint32_t GetPid() { return syscall(__NR_getpid); } @@ -1080,55 +1084,55 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( // HSA-runtime tool on-load method PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - if (onload_debug) { printf("LIB roctracer_load\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); static bool is_loaded = false; if (is_loaded) return true; is_loaded = true; - if (onload_debug) { printf("LIB roctracer_load end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); return true; } PUBLIC_API void roctracer_unload(bool destruct) { static bool is_unloaded = false; + ONLOAD_TRACE("begin (" << destruct << ", " << is_unloaded << ")"); - if (onload_debug) { printf("LIB roctracer_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); } if (destruct == false) return; if (is_unloaded == true) return; is_unloaded = true; roctracer::trace_buffer.Flush(); roctracer::close_output_file(roctracer::kernel_file_handle); - if (onload_debug) { printf("LIB roctracer_unload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - if (onload_debug) { printf("LIB OnLoad\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); - if (onload_debug) { printf("LIB OnLoad end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); return ret; } PUBLIC_API void OnUnload() { - if (onload_debug) { printf("LIB OnUnload\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); roctracer_unload(false); - if (onload_debug) { printf("LIB OnUnload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } CONSTRUCTOR_API void constructor() { - if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; - if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } DESTRUCTOR_API void destructor() { - if (onload_debug) { printf("LIB destructor\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); roctracer_unload(true); util::HsaRsrcFactory::Destroy(); roctracer::util::Logger::Destroy(); - if (onload_debug) { printf("LIB destructor end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } } // extern "C" diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 92fdbf7a52..be2c913160 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -59,7 +59,7 @@ THE SOFTWARE. #define ONLOAD_TRACE(str) \ if (getenv("ROCP_ONLOAD_TRACE")) do { \ - std::cout << "PID(" << GetPid() << "): TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ + std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ } while(0); #define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") #define ONLOAD_TRACE_END() ONLOAD_TRACE("end") From c58f1f411c96c60f224d8c79dabda87a4cea427d Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 5 Dec 2019 18:13:30 -0600 Subject: [PATCH 30/94] hcc corr id wait enabled by default --- src/core/roctracer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 8eac5a1f20..ed9cb4d2dc 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -284,7 +284,7 @@ typedef std::map correlati typedef std::mutex correlation_id_mutex_t; correlation_id_map_t* correlation_id_map = NULL; correlation_id_mutex_t correlation_id_mutex; -bool correlation_id_wait = false; +bool correlation_id_wait = true; static thread_local std::stack external_id_stack; @@ -827,6 +827,10 @@ static roctracer_status_t roctracer_enable_activity_fun( roctracer::correlation_id_wait = true; fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout); } + if (getenv("ROCP_HCC_CORRID_NOWAIT") != NULL) { + roctracer::correlation_id_wait = false; + fprintf(stdout, "roctracer: HCC correlation ID wait disabled\n"); fflush(stdout); + } roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, (void*)roctracer::HCC_AsyncActivityCallback, (void*)pool); @@ -1064,7 +1068,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( const char* hip_lib_name = "libamdhip64.so"; roctracer::HccLoader::SetLibName(hip_lib_name); roctracer::HipLoader::SetLibName(hip_lib_name); - roctracer::correlation_id_wait = true; #endif mark_api_callback_ptr = reinterpret_cast(properties); break; From e606af1499bdbb9c7e0ca82563f1bf31a4bfcea0 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 8 Dec 2019 12:11:50 -0600 Subject: [PATCH 31/94] corremation id initialization fix --- src/core/roctracer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index ed9cb4d2dc..f9f87557f3 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -332,6 +332,7 @@ void* HIP_SyncActivityCallback( data = &(top.data.hip); data_ptr = const_cast(data); data_ptr->phase = phase; + data_ptr->correlation_id = 0; } // Filing record info From 0316b23df0ba7edd4c6e2afe6407dea82bf88fc1 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 8 Dec 2019 12:38:19 -0600 Subject: [PATCH 32/94] enabling of trace disable for HIP-VDI --- src/core/roctracer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index f9f87557f3..07a998db11 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -716,10 +716,8 @@ static roctracer_status_t roctracer_disable_callback_fun( case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { -#if !HIP_VDI hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); -#endif break; } case ACTIVITY_DOMAIN_ROCTX: { @@ -911,10 +909,8 @@ static roctracer_status_t roctracer_disable_activity_fun( break; } case ACTIVITY_DOMAIN_HIP_API: { -#if !HIP_VDI const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); -#endif break; } case ACTIVITY_DOMAIN_ROCTX: break; From b5b45f4414efa906fcd3ce60fa0c695767481b29 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 11 Dec 2019 13:57:23 -0600 Subject: [PATCH 33/94] adding delay -1 to rate controll as trace start disabled --- test/tool/tracer_tool.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index be2c913160..549debe97a 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -717,11 +717,9 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint32_t ctrl_delay = 0; uint32_t ctrl_len = 0; uint32_t ctrl_rate = 0; - int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); - if (ret != 3) { - fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str); - abort(); - } + + sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); + if (ctrl_len > ctrl_rate) { fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate); abort(); @@ -730,15 +728,18 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, control_len_us = ctrl_len; control_delay_us = ctrl_delay; - fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); - roctracer_stop(); - pthread_t thread; - pthread_attr_t attr; - int err = pthread_attr_init(&attr); - if (err) { errno = err; perror("pthread_attr_init"); abort(); } - err = pthread_create(&thread, &attr, control_thr_fun, NULL); + if (ctrl_delay != UINT32_MAX) { + fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, control_thr_fun, NULL); + } else { + fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout); + } } // Enable KFD API callbacks/activity From 23a1d54acf346b111453bee59cac78d3dd407a30 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 12 Dec 2019 19:03:26 -0600 Subject: [PATCH 34/94] renaming libmcwamp_hsa to libmcwamp; force loading of roctx library; --- src/core/loader.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/core/loader.h b/src/core/loader.h index c7e95734b2..1d85a31787 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -46,7 +46,7 @@ class BaseLoader : public T { private: BaseLoader() { - const int flags = RTLD_LAZY|RTLD_NOLOAD; + const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); if (handle_ == NULL) { fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); @@ -61,6 +61,8 @@ class BaseLoader : public T { if (handle_ != NULL) dlclose(handle_); } + static bool to_load_; + static mutex_t mutex_; static const char* lib_name_; static std::atomic instance_; @@ -171,9 +173,11 @@ typedef BaseLoader RocTxLoader; #define LOADER_INSTANTIATE() \ template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ + template bool roctracer::BaseLoader::to_load_ = false; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ - template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \ + template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ - template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; + template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ + template<> bool roctracer::RocTxLoader::to_load_ = true; #endif // SRC_CORE_LOADER_H_ From f4e3e457d9a5780e4906ec229fa06f1457571ed2 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 18 Dec 2019 19:16:34 -0600 Subject: [PATCH 35/94] removing private header roctracer_roctx.h from release package --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cda9887338..48efe39b37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,7 +106,6 @@ add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} install ( TARGETS "roctx64" LIBRARY DESTINATION lib ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) ## KFD wrapper From 0e8c410ef3382f40b111439bcd17822f7c390ba8 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Wed, 18 Dec 2019 19:53:25 -0600 Subject: [PATCH 36/94] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f9cf78fbc..79a5828a62 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,10 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer - Set environment: + export CMAKE_PREFIX_PATH=/opt/rocm + - To use custom HIP/HCC versions: export HIP_PATH=/opt/rocm/hip export HCC_HOME=/opt/rocm/hcc - export CMAKE_PREFIX_PATH=/opt/rocm - Build ROCtracer export CMAKE_BUILD_TYPE= # release by default From aae6ab64bba288424929e3809698de15ef335b86 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Wed, 18 Dec 2019 19:55:16 -0600 Subject: [PATCH 37/94] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 79a5828a62..f4965b92f8 100644 --- a/README.md +++ b/README.md @@ -50,15 +50,15 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. export HIP_PATH=/opt/rocm/hip export HCC_HOME=/opt/rocm/hcc - - Build ROCtracer + - To build roctracer library: export CMAKE_BUILD_TYPE= # release by default cd /roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j - - To build and run test + - To build and run test: make mytest run.sh - - To install + - To install: make install or make package && dpkg -i *.deb From b7aa8fedb73d6564378b2fc5ffb76f830429fb7b Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Thu, 19 Dec 2019 18:03:40 +0000 Subject: [PATCH 38/94] Introduce a new API roctracer_get_timestamp(). roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp); Get system timestamp for roctracer clients. The API could be used to help roctracer clients understand the reference frame of timestamps when receiving activity callbacks, as the nanoseconds reported in the activity callbacks are not in the same reference frame as CPU walltime clock. --- inc/roctracer.h | 4 ++++ src/core/roctracer.cpp | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/inc/roctracer.h b/inc/roctracer.h index 812fc464f4..ce9b31add9 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -225,6 +225,10 @@ bool roctracer_load( void roctracer_unload(bool destruct); +// Get system timestamp. +roctracer_status_t roctracer_get_timestamp( + uint64_t* timestamp); + #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 07a998db11..87b0ace7ff 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -1106,6 +1106,12 @@ PUBLIC_API void roctracer_unload(bool destruct) { ONLOAD_TRACE_END(); } +PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) { + API_METHOD_PREFIX + *timestamp = util::HsaRsrcFactory::Instance().TimestampNs(); + API_METHOD_SUFFIX +} + PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { ONLOAD_TRACE_BEG(); From dceeb6846eafa0c369441bea6dff4841d0f067fc Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 25 Dec 2019 05:51:19 -0600 Subject: [PATCH 39/94] checking hip/hcc loader enabled --- src/core/loader.h | 6 +++++- src/core/roctracer.cpp | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/core/loader.h b/src/core/loader.h index 1d85a31787..43b07be905 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -48,7 +48,7 @@ class BaseLoader : public T { BaseLoader() { const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); - if (handle_ == NULL) { + if ((to_check_ == true) && (handle_ == NULL)) { fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); abort(); } @@ -62,6 +62,7 @@ class BaseLoader : public T { } static bool to_load_; + static bool to_check_; static mutex_t mutex_; static const char* lib_name_; @@ -174,8 +175,11 @@ typedef BaseLoader RocTxLoader; template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ template bool roctracer::BaseLoader::to_load_ = false; \ + template bool roctracer::BaseLoader::to_check_ = true; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ + template<> bool roctracer::HipLoader::to_check_ = false; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ + template<> bool roctracer::HccLoader::to_check_ = false; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ template<> bool roctracer::RocTxLoader::to_load_ = true; diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 07a998db11..eea72bf12d 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -640,6 +640,8 @@ static roctracer_status_t roctracer_enable_callback_fun( } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")"); break; @@ -716,6 +718,8 @@ static roctracer_status_t roctracer_disable_callback_fun( case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); break; @@ -821,7 +825,10 @@ static roctracer_status_t roctracer_enable_activity_fun( case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { - if (roctracer::HccLoader::GetRef() == NULL) { + const bool init_phase = (roctracer::HccLoader::GetRef() == NULL); + if (roctracer::HccLoader::Instance().Enabled() == false) break; + + if (init_phase == true) { if (getenv("ROCP_HCC_CORRID_WAIT") != NULL) { roctracer::correlation_id_wait = true; fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout); @@ -839,6 +846,8 @@ static roctracer_status_t roctracer_enable_activity_fun( break; } case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")"); break; @@ -904,11 +913,15 @@ static roctracer_status_t roctracer_disable_activity_fun( case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { + if (roctracer::HccLoader::Instance().Enabled() == false) break; + const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); break; } case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); break; From dd69b522c3af93a3acc1679ff4a5e7bfb3995503 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Fri, 27 Dec 2019 12:14:27 -0500 Subject: [PATCH 40/94] Update gen_ostream_ops.py --- script/gen_ostream_ops.py | 68 ++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py index f98a83b35b..fe3c86e364 100644 --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -76,7 +76,7 @@ HEADER = \ '\n' structs_done = {} -def process_struct(f,c,cppHeader,nname): +def process_struct(f,c,cppHeader,nname,apiname): if c not in cppHeader.classes: return @@ -104,9 +104,9 @@ def process_struct(f,c,cppHeader,nname): if mtype != "" and "union" not in mtype: if array_size == "": - str = " roctracer::kfd_support::output_streamer<"+mtype+">::put(out,v."+name+");\n" + str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+">::put(out,v."+name+");\n" else: - str = " roctracer::kfd_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n" + str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n" if nname != "" and nname not in str: #print("injecting ",nname, "in ", str) @@ -115,11 +115,11 @@ def process_struct(f,c,cppHeader,nname): f.write(str) else: nc = prop+"::" - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) nc = prop+"::"+mtype+" " - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) nc = c+"::" - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) def gen_cppheader(infilepath,outfilepath): @@ -128,41 +128,53 @@ def gen_cppheader(infilepath,outfilepath): except CppHeaderParser.CppParseError as e: print(e) sys.exit(1) - + mpath = os.path.dirname(outfilepath) + if mpath == "": + mpath = os.getcwd() + apiname = outfilepath.replace(mpath+"/","") + apiname = apiname.replace("_ostream_ops.h","") + apiname = apiname.upper() f = open(outfilepath,"w+") + f2 = open(mpath + "/basic_ostream_ops.h","w+") f.write("// automatically generated\n") - f.write(LICENSE) - f.write("\n") + f2.write("// automatically generated\n") + f.write(LICENSE + '\n') + f2.write(LICENSE + '\n') HEADER_S = \ - '#ifndef INC_KFD_OSTREAM_OPS_H_\n' + \ - '#define INC_KFD_OSTREAM_OPS_H_\n' + \ + '#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ + '#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ '#include \n' + \ '\n' + \ - '#include "roctracer.h"\n' + \ - '#include "hsakmt.h"\n' + '#include "roctracer.h"\n' + if apiname == "KFD": + HEADER_S += '#include "hsakmt.h"\n' + if apiname == "HSA": + HEADER_S += '#include \n#include \n#include \n #include "cb_table.h"\n' f.write(HEADER_S) f.write('\n') f.write('namespace roctracer {\n') - f.write('namespace kfd_support {\n') - f.write('// begin ostream ops for KFD \n') - f.write(HEADER) + f.write('namespace ' + apiname.lower() + '_support {\n') + f.write('// begin ostream ops for '+ apiname + ' \n') + f.write('#include "basic_ostream_ops.h"' + '\n') + f2.write(HEADER) for c in cppHeader.classes: if "union" in c: continue - f.write("\ntemplate<>\n") - f.write("struct output_streamer<"+c+"&> {\n") - f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n") - f.write("{\n") - process_struct(f,c,cppHeader,"") - f.write(" return out;\n") - f.write("}\n") - f.write("};\n") + if len(cppHeader.classes[c]["properties"]["public"])!=0: + f.write("\ntemplate<>\n") + f.write("struct output_streamer<"+c+"&> {\n") + f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n") + f.write("{\n") + process_struct(f,c,cppHeader,"",apiname) + f.write(" return out;\n") + f.write("}\n") + f.write("};\n") FOOTER = \ - '// end ostream ops for KFD \n' + '// end ostream ops for '+ apiname + ' \n' FOOTER += '};};\n' + \ '\n' + \ - '#endif // INC_KFD_OSTREAM_OPS_H_\n' + \ + '#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ ' \n' FOOTER2 = '\n\n' + \ '#endif // INC_BASIC_OSTREAM_OPS_H_\n' + \ @@ -170,6 +182,10 @@ def gen_cppheader(infilepath,outfilepath): f.write(FOOTER) f.close() + f2.close() + print('File ' + outfilepath + ' generated') + print('File ' + mpath + '/basic_ostream_ops.h generated') + return parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') From 077f8ec6b6f0a465de8df24c2110c0196c6a5e30 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 26 Dec 2019 06:40:49 -0600 Subject: [PATCH 41/94] normalizing C API --- inc/ext/prof_protocol.h | 4 +- inc/roctracer.h | 69 ++++++++++---- inc/roctracer_ext.h | 4 +- inc/roctracer_hcc.h | 24 ++--- inc/roctracer_hip.h | 2 +- inc/roctracer_kfd.h | 4 +- inc/roctracer_roctx.h | 11 +-- script/kfdap.py | 8 +- src/core/roctracer.cpp | 16 ++-- test/CMakeLists.txt | 2 + test/MatrixTranspose_test/Makefile | 24 +++-- test/MatrixTranspose_test/MatrixTranspose.c | 1 + test/MatrixTranspose_test/MatrixTranspose.cpp | 91 +++++++++++-------- test/run.sh | 1 + test/tool/tracer_tool.cpp | 4 +- 15 files changed, 162 insertions(+), 103 deletions(-) create mode 120000 test/MatrixTranspose_test/MatrixTranspose.c diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index 6d9cd62714..ee52e91082 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -60,7 +60,7 @@ typedef enum { typedef uint64_t activity_correlation_id_t; // Activity record type -struct activity_record_t { +typedef struct activity_record_s { uint32_t domain; // activity domain id activity_kind_t kind; // activity kind activity_op_t op; // activity op @@ -81,7 +81,7 @@ struct activity_record_t { }; }; size_t bytes; // data size bytes -}; +} activity_record_t; // Activity sync calback type typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); diff --git a/inc/roctracer.h b/inc/roctracer.h index ce9b31add9..0fc6df0063 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -38,10 +38,13 @@ THE SOFTWARE. #include #include +#ifndef __cplusplus +#include +#endif #include "ext/prof_protocol.h" -#define ROCTRACER_VERSION_MAJOR 1 +#define ROCTRACER_VERSION_MAJOR 2 #define ROCTRACER_VERSION_MINOR 0 #ifdef __cplusplus @@ -89,7 +92,7 @@ roctracer_status_t roctracer_op_code( uint32_t domain, // tracing domain const char* str, // [in] op string uint32_t* op, // [out] op code - uint32_t* kind = NULL); // [out] op kind code + uint32_t* kind); // [out] op kind code if not NULL //////////////////////////////////////////////////////////////////////////////// // Callback API @@ -172,31 +175,59 @@ typedef void roctracer_pool_t; // Create tracer memory pool // The first invocation sets the default pool -roctracer_status_t roctracer_open_pool( +roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, // tracer pool properties - roctracer_pool_t** pool = NULL); // [out] returns tracer pool if not NULL, + roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, // otherwise sets the default one if it is not set yet +roctracer_status_t roctracer_open_pool( + const roctracer_properties_t* properties) // tracer pool properties +{ + return roctracer_open_pool_expl(properties, NULL); +} // otherwise the error is generated // Close tracer memory pool -roctracer_status_t roctracer_close_pool( - roctracer_pool_t* pool = NULL); // [in] memory pool, NULL is a default one +roctracer_status_t roctracer_close_pool_expl( + roctracer_pool_t* pool); // [in] memory pool, NULL is a default one +roctracer_status_t roctracer_close_pool() +{ + return roctracer_close_pool_expl(NULL); +} // Return current default pool // Set new default pool if the argument is not NULL -roctracer_pool_t* roctracer_default_pool( - roctracer_pool_t* pool = NULL); // [in] new default pool if not NULL +roctracer_pool_t* roctracer_default_pool_expl( + roctracer_pool_t* pool); // [in] new default pool if not NULL +roctracer_pool_t* roctracer_default_pool() +{ + return roctracer_default_pool_expl(NULL); +} // Enable activity records logging -roctracer_status_t roctracer_enable_op_activity( +roctracer_status_t roctracer_enable_op_activity_expl( activity_domain_t domain, // tracing domain uint32_t op, // activity op ID - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_domain_activity( + roctracer_pool_t* pool); // memory pool, NULL is a default one +roctracer_status_t roctracer_enable_op_activity( activity_domain_t domain, // tracing domain - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_activity( - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one + uint32_t op) // activity op ID +{ + return roctracer_enable_op_activity_expl(domain, op, NULL); +} +roctracer_status_t roctracer_enable_domain_activity_expl( + activity_domain_t domain, // tracing domain + roctracer_pool_t* pool); // memory pool, NULL is a default one +roctracer_status_t roctracer_enable_domain_activity( + activity_domain_t domain) // tracing domain +{ + return roctracer_enable_domain_activity_expl(domain, NULL); +} +roctracer_status_t roctracer_enable_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL is a default one +roctracer_status_t roctracer_enable_activity() +{ + return roctracer_enable_activity_expl(NULL); +} // Disable activity records logging roctracer_status_t roctracer_disable_op_activity( @@ -207,8 +238,12 @@ roctracer_status_t roctracer_disable_domain_activity( roctracer_status_t roctracer_disable_activity(); // Flush available activity records -roctracer_status_t roctracer_flush_activity( - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one +roctracer_status_t roctracer_flush_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL is a default one +roctracer_status_t roctracer_flush_activity() +{ + return roctracer_flush_activity_expl(NULL); +} // Load/Unload methods // Set properties @@ -216,7 +251,7 @@ roctracer_status_t roctracer_set_properties( roctracer_domain_t domain, // tracing domain void* propertes); // tracing properties -struct HsaApiTable; +typedef struct HsaApiTable HsaApiTable; bool roctracer_load( HsaApiTable* table, uint64_t runtime_version, diff --git a/inc/roctracer_ext.h b/inc/roctracer_ext.h index c2f5c54542..2427336c7b 100644 --- a/inc/roctracer_ext.h +++ b/inc/roctracer_ext.h @@ -63,8 +63,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr // Notifies that the calling thread is leaving an external API region. // Pop an external correlation id for the calling thread. -// 'lastId' returns the last external correlation -roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id = NULL); +// 'lastId' returns the last external correlation if not NULL +roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id); #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_hcc.h b/inc/roctracer_hcc.h index 252b984d2a..0781460145 100644 --- a/inc/roctracer_hcc.h +++ b/inc/roctracer_hcc.h @@ -23,26 +23,22 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HCC_H_ #define INC_ROCTRACER_HCC_H_ -#if HIP_VDI -#define HIP_OP_ID_NUMBER 3 -#define HIP_OP_ID_COPY 1 +enum { + HIP_OP_ID_DISPATCH = 0, + HIP_OP_ID_COPY = 1, + HIP_OP_ID_BARRIER = 2, + HIP_OP_ID_NUMBER = 3 +}; + +#ifdef __cplusplus extern "C" { +#endif typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); typedef const char* (hipGetOpName_t)(unsigned op); +#ifdef __cplusplus } -#else // !HIP_VDI -#if LOCAL_BUILD -#include -#else -#include #endif -#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER -#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY -typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t; -typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t; -typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t; -#endif // !HIP_VDI #include "roctracer.h" diff --git a/inc/roctracer_hip.h b/inc/roctracer_hip.h index d365dd9444..28e4868d59 100644 --- a/inc/roctracer_hip.h +++ b/inc/roctracer_hip.h @@ -33,7 +33,7 @@ extern "C" { #endif // __cplusplus // Traced calls ID enumeration -typedef hip_api_id_t roctracer_hip_api_cid_t; +typedef enum hip_api_id_t roctracer_hip_api_cid_t; #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_kfd.h b/inc/roctracer_kfd.h index 45113ce435..fcc1e3cd87 100644 --- a/inc/roctracer_kfd.h +++ b/inc/roctracer_kfd.h @@ -23,11 +23,11 @@ THE SOFTWARE. ///////////////////////////////////////////////////////////////////////////// #ifndef INC_ROCTRACER_KFD_H_ #define INC_ROCTRACER_KFD_H_ -#include - #include "roctracer.h" #include "hsakmt.h" +#ifdef __cplusplus #include "inc/kfd_ostream_ops.h" +#endif #include "inc/kfd_prof_str.h" #endif // INC_ROCTRACER_KFD_H_ diff --git a/inc/roctracer_roctx.h b/inc/roctracer_roctx.h index 329e974d61..accec45255 100644 --- a/inc/roctracer_roctx.h +++ b/inc/roctracer_roctx.h @@ -33,8 +33,6 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_ROCTX_H_ #define INC_ROCTRACER_ROCTX_H_ -#include "cb_table.h" - // ROC-TX API ID enumeration enum roctx_api_id_t { ROCTX_API_ID_roctxMarkA = 0, @@ -45,7 +43,7 @@ enum roctx_api_id_t { }; // ROCTX callbacks data type -struct roctx_api_data_t { +typedef struct roctx_api_data_s { union { const char* message; struct { @@ -58,14 +56,15 @@ struct roctx_api_data_t { const char* message; } roctxRangePop; } args; -}; +} roctx_api_data_t; +#ifdef __cplusplus +#include "cb_table.h" namespace roctx { - // ROCTX callbacks table type typedef roctracer::CbTable cb_table_t; - } // namespace roctx +#endif #ifdef __cplusplus extern "C" { diff --git a/script/kfdap.py b/script/kfdap.py index c46bf60d78..06248d9f26 100755 --- a/script/kfdap.py +++ b/script/kfdap.py @@ -284,7 +284,6 @@ class API_DescrParser: self.content_h += '#include \n' self.content_h += '#include \"roctracer_kfd.h\"\n' self.content_h += '#include \"hsakmt.h\"\n' - self.content_h += '#include \"cb_table.h\"\n' self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n' @@ -293,6 +292,7 @@ class API_DescrParser: self.content_h += '\n' self.content_h += '#if PROF_API_IMPL\n' + self.content_h += '#include \"cb_table.h\"\n' self.content_h += 'namespace roctracer {\n' self.content_h += 'namespace kfd_support {\n' @@ -372,7 +372,7 @@ class API_DescrParser: # generate API args structure def gen_arg_struct(self, n, name, call, struct): if n == -1: - self.content_h += 'struct kfd_api_data_t {\n' + self.content_h += 'typedef struct kfd_api_data_s {\n' self.content_h += ' uint64_t correlation_id;\n' self.content_h += ' uint32_t phase;\n' if len(self.api_rettypes) != 0: @@ -394,7 +394,7 @@ class API_DescrParser: self.content_h += ' } ' + call + ';\n' else: self.content_h += ' } args;\n' - self.content_h += '};\n' + self.content_h += '} kfd_api_data_t;\n' # generate API callbacks def gen_callbacks(self, n, name, call, struct): @@ -476,6 +476,7 @@ class API_DescrParser: # generate stream operator def gen_out_stream(self, n, name, call, struct): if n == -1: + self.content_h += '#ifdef __cplusplus\n' self.content_h += 'typedef std::pair kfd_api_data_pair_t;\n' self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n' self.content_h += ' const uint32_t cid = data_pair.first;\n' @@ -509,6 +510,7 @@ class API_DescrParser: self.content_h += ' }\n' self.content_h += ' return out;\n' self.content_h += '}\n' + self.content_h += '#endif\n' self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n' # generate PUBLIC_API for all API fcts diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 9199bad668..c006e6d88c 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -176,7 +176,7 @@ decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t; typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t; -typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t; +typedef decltype(roctracer_enable_op_activity_expl)* roctracer_enable_op_activity_t; typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t; struct cb_journal_data_t { @@ -774,7 +774,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_callback() } // Return default pool and set new one if parameter pool is not NULL. -PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) { +PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) { std::lock_guard lock(roctracer::memory_pool_mutex); roctracer_pool_t* p = reinterpret_cast(roctracer::memory_pool); if (pool != NULL) roctracer::memory_pool = reinterpret_cast(pool); @@ -782,7 +782,7 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) { } // Open memory pool -PUBLIC_API roctracer_status_t roctracer_open_pool( +PUBLIC_API roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, roctracer_pool_t** pool) { @@ -799,7 +799,7 @@ PUBLIC_API roctracer_status_t roctracer_open_pool( } // Close memory pool -PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) { +PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX std::lock_guard lock(roctracer::memory_pool_mutex); roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool; @@ -868,7 +868,7 @@ static void roctracer_enable_activity_impl( roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool); } -PUBLIC_API roctracer_status_t roctracer_enable_op_activity( +PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl( roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) @@ -878,7 +878,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_op_activity( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_domain_activity( +PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl( roctracer_domain_t domain, roctracer_pool_t* pool) { @@ -888,7 +888,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_activity( +PUBLIC_API roctracer_status_t roctracer_enable_activity_expl( roctracer_pool_t* pool) { API_METHOD_PREFIX @@ -970,7 +970,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_activity() } // Flush available activity records -PUBLIC_API roctracer_status_t roctracer_flush_activity(roctracer_pool_t* pool) { +PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 923384c444..ef0a8ea633 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -35,6 +35,8 @@ add_custom_target( mytest COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" + COMMAND C_TEST=1 HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose_ctest ${PROJECT_BINARY_DIR}/test/MatrixTranspose_ctest" ) ## Util sources diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index 2a767a5626..c59c497af1 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -14,24 +14,34 @@ HIPCC=$(HIP_PATH)/bin/hipcc TARGET=hcc -SOURCES = MatrixTranspose.cpp -OBJECTS = $(SOURCES:.cpp=.o) -EXECUTABLE=./MatrixTranspose + +FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) +ifeq ($(C_TEST), 1) + COMP=gcc + SOURCES = MatrixTranspose.c + FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I/opt/rocm/hcc/include + EXECUTABLE=./MatrixTranspose_ctest +else + COMP=$(HIPCC) + FLAGS += -DHIP_TEST=1 + SOURCES = MatrixTranspose.cpp + EXECUTABLE=./MatrixTranspose +endif +OBJECTS = MatrixTranspose.o .PHONY: test - all: clean $(EXECUTABLE) -CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -CXX=$(HIPCC) +$(OBJECTS): $(SOURCES) + $(COMP) $(FLAGS) -c -o $@ $< $(EXECUTABLE): $(OBJECTS) $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) test: $(EXECUTABLE) - $(EXECUTABLE) + LD_PRELOAD=$(LIB_PATH)/libkfdwrapper64.so $(EXECUTABLE) clean: rm -f $(EXECUTABLE) diff --git a/test/MatrixTranspose_test/MatrixTranspose.c b/test/MatrixTranspose_test/MatrixTranspose.c new file mode 120000 index 0000000000..14d96acbc8 --- /dev/null +++ b/test/MatrixTranspose_test/MatrixTranspose.c @@ -0,0 +1 @@ +MatrixTranspose.cpp \ No newline at end of file diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 1a7b2cb97a..11ad71709a 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -20,32 +20,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include -// roctracer extension API -#include - -// hip header file -#include +#ifdef __cplusplus +#include +using namespace std; +#else +#include +#endif // roctx header file #include +// roctracer extension API +#include -// kfd header file -#include +#if HIP_TEST +// hip header file +#include +// Macro to call HIP API +#define HIP_CALL(call) do { call; } while(0) +#else +#define HIP_CALL(call) do {} while(0) +#endif #ifndef ITERATIONS # define ITERATIONS 101 #endif #define WIDTH 1024 - - #define NUM (WIDTH * WIDTH) - #define THREADS_PER_BLOCK_X 4 #define THREADS_PER_BLOCK_Y 4 #define THREADS_PER_BLOCK_Z 1 +#if HIP_TEST // Device (Kernel) function, it must be void __global__ void matrixTranspose(float* out, float* in, const int width) { int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -53,6 +60,7 @@ __global__ void matrixTranspose(float* out, float* in, const int width) { out[y * width + x] = in[x * width + y]; } +#endif // CPU implementation of matrix transpose void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { @@ -76,10 +84,12 @@ int main() { float* gpuMatrix; float* gpuTransposeMatrix; +#if HIP_TEST hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + HIP_CALL(hipGetDeviceProperties(&devProp, 0)); - std::cout << "Device name " << devProp.name << std::endl; + printf("Device name %s\n", devProp.name); +#endif int i; int errors; @@ -99,8 +109,8 @@ int main() { } // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + HIP_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); + HIP_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); // correlation reagion32 roctracer_activity_push_external_correlation_id(31); @@ -108,7 +118,7 @@ int main() { roctracer_activity_push_external_correlation_id(32); // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + HIP_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); // correlation reagion33 roctracer_activity_push_external_correlation_id(33); @@ -117,9 +127,9 @@ int main() { roctxRangePush("hipLaunchKernel"); // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH); + HIP_CALL(hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, + gpuMatrix, WIDTH)); roctxMark("after hipLaunchKernel"); @@ -129,39 +139,40 @@ int main() { // Memory transfer from device to host roctxRangePush("hipMemcpy"); - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + HIP_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); roctxRangePop(); // for "hipMemcpy" roctxRangePop(); // for "hipLaunchKernel" // correlation reagion end - roctracer_activity_pop_external_correlation_id(); + roctracer_activity_pop_external_correlation_id(NULL); // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + HIP_CALL(matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH)); // verify the results errors = 0; double eps = 1.0E-6; for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + if (abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { errors++; } } - if (errors != 0) { + if ((HIP_TEST != 0) && (errors != 0)) { printf("FAILED: %d errors\n", errors); } else { + errors = 0; printf("PASSED!\n"); } // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + HIP_CALL(hipFree(gpuMatrix)); + HIP_CALL(hipFree(gpuTransposeMatrix)); // correlation reagion end - roctracer_activity_pop_external_correlation_id(); + roctracer_activity_pop_external_correlation_id(NULL); // correlation reagion end - roctracer_activity_pop_external_correlation_id(); + roctracer_activity_pop_external_correlation_id(NULL); // free the resources on host side free(Matrix); @@ -180,6 +191,7 @@ int main() { #if 1 #include #include +#include #include // Macro to check ROC-tracer calls status @@ -187,7 +199,7 @@ int main() { do { \ int err = call; \ if (err != 0) { \ - std::cerr << roctracer_error_string() << std::endl << std::flush; \ + fprintf(stderr, "%s\n", roctracer_error_string()); \ abort(); \ } \ } while (0) @@ -202,12 +214,12 @@ void api_callback( (void)arg; if (domain == ACTIVITY_DOMAIN_ROCTX) { - const roctx_api_data_t* data = reinterpret_cast(callback_data); + const roctx_api_data_t* data = (const roctx_api_data_t*)(callback_data); fprintf(stdout, "\n", data->args.message); return; } if (domain == ACTIVITY_DOMAIN_KFD_API) { - const kfd_api_data_t* data = reinterpret_cast(callback_data); + const kfd_api_data_t* data = (const kfd_api_data_t*)(callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> \n", roctracer_op_string(ACTIVITY_DOMAIN_KFD_API, cid, 0), cid, @@ -215,7 +227,7 @@ void api_callback( (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); return; } - const hip_api_data_t* data = reinterpret_cast(callback_data); + const hip_api_data_t* data = (const hip_api_data_t*)(callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, @@ -263,8 +275,8 @@ void api_callback( // Activity tracing callback // hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) void activity_callback(const char* begin, const char* end, void* arg) { - const roctracer_record_t* record = reinterpret_cast(begin); - const roctracer_record_t* end_record = reinterpret_cast(end); + const roctracer_record_t* record = (const roctracer_record_t*)(begin); + const roctracer_record_t* end_record = (const roctracer_record_t*)(end); fprintf(stdout, "\tActivity records:\n"); fflush(stdout); while (record < end_record) { const char * name = roctracer_op_string(record->domain, record->op, record->kind); @@ -274,7 +286,7 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->begin_ns, record->end_ns ); - if (record->domain == ACTIVITY_DOMAIN_HIP_API or record->domain == ACTIVITY_DOMAIN_KFD_API) { + if ((record->domain == ACTIVITY_DOMAIN_HIP_API) || (record->domain == ACTIVITY_DOMAIN_KFD_API)) { fprintf(stdout, " process_id(%u) thread_id(%u)", record->process_id, record->thread_id @@ -301,11 +313,12 @@ void activity_callback(const char* begin, const char* end, void* arg) { // Init tracing routine void init_tracing() { - std::cout << "# INIT #############################" << std::endl << std::flush; + printf("# INIT #############################\n"); // roctracer properties roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL); // Allocating tracing pool - roctracer_properties_t properties{}; + roctracer_properties_t properties; + memset(&properties, 0, sizeof(roctracer_properties_t)); properties.buffer_size = 0x1000; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); @@ -323,7 +336,7 @@ void init_tracing() { // Start tracing routine void start_tracing() { - std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush; + printf("# START (%d) #############################\n", iterations); // Start if ((iterations & 1) == 1) roctracer_start(); else roctracer_stop(); @@ -336,7 +349,7 @@ void stop_tracing() { ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); ROCTRACER_CALL(roctracer_flush_activity()); - std::cout << "# STOP #############################" << std::endl << std::flush; + printf("# STOP #############################\n"); } #else void init_tracing() {} diff --git a/test/run.sh b/test/run.sh index ccf646aed6..d634357516 100755 --- a/test/run.sh +++ b/test/run.sh @@ -65,6 +65,7 @@ eval_test() { # Standalone test # rocTrecer is used explicitely by test +eval_test "standalone C test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_ctest" eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" # Tool test diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 549debe97a..78751a7463 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -663,7 +663,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { uint32_t cid = HSA_API_ID_NUMBER; const char* api = hsa_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid)); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL)); ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); printf(" %s", api); } @@ -753,7 +753,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { uint32_t cid = KFD_API_ID_NUMBER; const char* api = kfd_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid)); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid, NULL)); ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); printf(" %s", api); } From 8280f473271ed0c0e2de7f996a841ed5584bd49f Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sun, 29 Dec 2019 04:09:10 -0600 Subject: [PATCH 42/94] adding multi-GPU test --- test/CMakeLists.txt | 4 +- test/MatrixTranspose_test/Makefile | 14 +- test/MatrixTranspose_test/MatrixTranspose.cpp | 26 +- test/MatrixTranspose_test_mgpu/Makefile | 40 --- .../MatrixTranspose.cpp | 313 ------------------ test/run.sh | 1 + 6 files changed, 30 insertions(+), 368 deletions(-) delete mode 100644 test/MatrixTranspose_test_mgpu/Makefile delete mode 100644 test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ef0a8ea633..05593efedd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -35,8 +35,10 @@ add_custom_target( mytest COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" + COMMAND MGPU_TEST=1 HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_mgpu" COMMAND C_TEST=1 HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" - COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose_ctest ${PROJECT_BINARY_DIR}/test/MatrixTranspose_ctest" + COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_ctest" ) ## Util sources diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index c59c497af1..dc5c42aa65 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -11,24 +11,24 @@ ifeq (,$(HIP_PATH)) endif HIPCC=$(HIP_PATH)/bin/hipcc - TARGET=hcc - - +EXECUTABLE=./MatrixTranspose +OBJECTS = MatrixTranspose.o FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) + ifeq ($(C_TEST), 1) COMP=gcc SOURCES = MatrixTranspose.c FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I/opt/rocm/hcc/include - EXECUTABLE=./MatrixTranspose_ctest else COMP=$(HIPCC) - FLAGS += -DHIP_TEST=1 SOURCES = MatrixTranspose.cpp - EXECUTABLE=./MatrixTranspose + FLAGS += -DHIP_TEST=1 +endif +ifeq ($(MGPU_TEST), 1) + FLAGS += -DMGPU_TEST=1 endif -OBJECTS = MatrixTranspose.o .PHONY: test diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 11ad71709a..c1189e9387 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -84,21 +84,33 @@ int main() { float* gpuMatrix; float* gpuTransposeMatrix; -#if HIP_TEST - hipDeviceProp_t devProp; - HIP_CALL(hipGetDeviceProperties(&devProp, 0)); - - printf("Device name %s\n", devProp.name); -#endif - int i; int errors; init_tracing(); +#if HIP_TEST + int gpuCount = 1; +#if MGPU_TEST + hipGetDeviceCount(&gpuCount); + printf("Number of GPUs: %d\n", gpuCount); +#endif + iterations *= gpuCount; +#endif + while (iterations-- > 0) { start_tracing(); +#if HIP_TEST + // set GPU + const int devIndex = iterations % gpuCount; + hipSetDevice(devIndex); + + hipDeviceProp_t devProp; + HIP_CALL(hipGetDeviceProperties(&devProp, 0)); + printf("Device %d name: %s\n", devIndex, devProp.name); +#endif + Matrix = (float*)malloc(NUM * sizeof(float)); TransposeMatrix = (float*)malloc(NUM * sizeof(float)); cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); diff --git a/test/MatrixTranspose_test_mgpu/Makefile b/test/MatrixTranspose_test_mgpu/Makefile deleted file mode 100644 index da9971b371..0000000000 --- a/test/MatrixTranspose_test_mgpu/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -ROOT_PATH = ../.. -LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -export LD_LIBRARY_PATH=$(LIB_PATH) -HIP_VDI ?= 0 -ITERATIONS ?= 1 - -HIP_PATH?= $(wildcard /opt/rocm/hip) -ifeq (,$(HIP_PATH)) - HIP_PATH=../../.. -endif - -HIPCC=$(HIP_PATH)/bin/hipcc - -TARGET=hcc - -SOURCES = MatrixTranspose.cpp -OBJECTS = $(SOURCES:.cpp=.o) - -EXECUTABLE=./MatrixTranspose - -.PHONY: test - - -all: clean $(EXECUTABLE) - -CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -CXX=$(HIPCC) - -$(EXECUTABLE): $(OBJECTS) - $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) - -test: $(EXECUTABLE) - $(EXECUTABLE) - -clean: - rm -f $(EXECUTABLE) - rm -f $(OBJECTS) - rm -f $(HIP_PATH)/src/*.o - diff --git a/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp b/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp deleted file mode 100644 index ffd4c88109..0000000000 --- a/test/MatrixTranspose_test_mgpu/MatrixTranspose.cpp +++ /dev/null @@ -1,313 +0,0 @@ -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// roctracer extension API -#include - -// hip header file -#include - -#ifndef ITERATIONS -# define ITERATIONS 1 -#endif -#define WIDTH 1024 - - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; - - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -int iterations = ITERATIONS; -void init_tracing(); -void start_tracing(); -void stop_tracing(); - -int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - int i; - int errors; - - int gpuCount = 0; - hipGetDeviceCount(&gpuCount); - std::cout << "Number of GPUs: " << gpuCount << std::endl; - - init_tracing(); - - while (iterations-- > 0) { - start_tracing(); - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - for (i = 0; i < gpuCount; ++i) { - // switch GPU. - hipSetDevice(i); - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - std::cout << "Device name " << devProp.name << std::endl; - - // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - - // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH); - - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - - hipStreamSynchronize(0); - - // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); - } - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - stop_tracing(); - } - - return errors; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// HIP Callbacks/Activity tracing -// -#if 1 -#include -#include - -// Macro to check ROC-tracer calls status -#define ROCTRACER_CALL(call) \ - do { \ - int err = call; \ - if (err != 0) { \ - std::cerr << roctracer_error_string() << std::endl << std::flush; \ - abort(); \ - } \ - } while (0) - -// Runtime API callback function -void api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ - std::cout << "### api_callback IN\n"; - (void)arg; - - //if (domain == ACTIVITY_DOMAIN_ROCTX) { - // const roctx_api_data_t* data = reinterpret_cast(callback_data); - // fprintf(stdout, "ROCTX: \"%s\"\n", data->args.message); - // return; - //} - - if (domain == ACTIVITY_DOMAIN_HCC_OPS) { - fprintf(stdout, "HCC OPS\n"); - return; - } - - if (domain == ACTIVITY_DOMAIN_HSA_API) { - fprintf(stdout, "HSA API\n"); - return; - } - - const hip_api_data_t* data = reinterpret_cast(callback_data); - fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", - roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), - cid, - data->correlation_id, - (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { - switch (cid) { - case HIP_API_ID_hipMemcpy: - fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)", - data->args.hipMemcpy.dst, - data->args.hipMemcpy.src, - (uint32_t)(data->args.hipMemcpy.sizeBytes), - (uint32_t)(data->args.hipMemcpy.kind)); - break; - case HIP_API_ID_hipMalloc: - fprintf(stdout, "ptr(%p) size(0x%x)", - data->args.hipMalloc.ptr, - (uint32_t)(data->args.hipMalloc.size)); - break; - case HIP_API_ID_hipFree: - fprintf(stdout, "ptr(%p)", - data->args.hipFree.ptr); - break; - case HIP_API_ID_hipModuleLaunchKernel: - fprintf(stdout, "kernel(\"%s\") stream(%p)", - hipKernelNameRef(data->args.hipModuleLaunchKernel.f), - data->args.hipModuleLaunchKernel.stream); - break; - default: - break; - } - } else { - switch (cid) { - case HIP_API_ID_hipMalloc: - fprintf(stdout, "*ptr(0x%p)", - *(data->args.hipMalloc.ptr)); - break; - default: - break; - } - } - fprintf(stdout, "\n"); fflush(stdout); -} - -// Activity tracing callback -// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) -void activity_callback(const char* begin, const char* end, void* arg) { - std::cout << "### activity_callback IN\n"; - const roctracer_record_t* record = reinterpret_cast(begin); - const roctracer_record_t* end_record = reinterpret_cast(end); - fprintf(stdout, "\tActivity records:\n"); fflush(stdout); - while (record < end_record) { - const char * name = roctracer_op_string(record->domain, record->op, record->kind); - fprintf(stdout, "\tdomain(%u)", record->domain); - fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", - name, - record->correlation_id, - record->begin_ns, - record->end_ns - ); - if (record->domain == ACTIVITY_DOMAIN_HIP_API) { - fprintf(stdout, " process_id(%u) thread_id(%u)", - record->process_id, - record->thread_id - ); - } else if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - fprintf(stdout, " device_id(%d) queue_id(%lu)", - record->device_id, - record->queue_id - ); - if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); - } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { - fprintf(stdout, " external_id(%lu)", - record->external_id - ); - } else { - fprintf(stderr, "Bad domain %d\n", record->domain); - //abort(); - } - fprintf(stdout, "\n"); - fflush(stdout); - ROCTRACER_CALL(roctracer_next_record(record, &record)); - } -} - -// Init tracing routine -void init_tracing() { - std::cout << "# INIT #############################" << std::endl << std::flush; - // Allocating tracing pool - roctracer_properties_t properties{}; - properties.buffer_size = 0x1000; - properties.buffer_callback_fun = activity_callback; - properties.buffer_callback_arg = &properties; - ROCTRACER_CALL(roctracer_open_pool(&properties)); - // Enable API callbacks - ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL)); - // Enable activity tracing - ROCTRACER_CALL(roctracer_enable_activity()); -} - -// Start tracing routine -void start_tracing() { - std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush; -} - -// Stop tracing routine -void stop_tracing() { - ROCTRACER_CALL(roctracer_disable_callback()); - - ROCTRACER_CALL(roctracer_disable_activity()); - ROCTRACER_CALL(roctracer_flush_activity()); - std::cout << "# STOP #############################" << std::endl << std::flush; -} -#else -void init_tracing() {} -void start_tracing() {} -void stop_tracing() {} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/test/run.sh b/test/run.sh index d634357516..7b7d5109b3 100755 --- a/test/run.sh +++ b/test/run.sh @@ -67,6 +67,7 @@ eval_test() { # rocTrecer is used explicitely by test eval_test "standalone C test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_ctest" eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" +eval_test "standalone HIP MGPU test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_mgpu" # Tool test # rocTracer/tool is loaded by HSA runtime From b82a21056edee2eb69b2bcb2845d9989c5f7479c Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 30 Dec 2019 20:09:41 -0600 Subject: [PATCH 43/94] normalizing C API: HSA domain --- cmake_modules/env.cmake | 1 + inc/roctracer_hsa.h | 12 ++++++++---- script/hsaap.py | 2 ++ test/CMakeLists.txt | 7 ++++--- test/MatrixTranspose_test/Makefile | 4 ++-- test/MatrixTranspose_test/MatrixTranspose.cpp | 1 + 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cmake_modules/env.cmake b/cmake_modules/env.cmake index da09ad4f5c..5d3cde0064 100644 --- a/cmake_modules/env.cmake +++ b/cmake_modules/env.cmake @@ -118,6 +118,7 @@ endif () find_library ( HSA_KMT_LIB "libhsakmt.so" ) get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY ) set ( HSA_KMT_INC_PATH "${HSA_KMT_LIB_PATH}/../include" ) +set ( ROCM_INC_PATH ${HSA_KMT_INC_PATH} ) ## Basic Tool Chain Information message ( "----------------NBIT: ${NBIT}" ) diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index c01253e79f..8531ab51dc 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -22,16 +22,16 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HSA_H_ #define INC_ROCTRACER_HSA_H_ -#include -#include #include -#include #include -#include "cb_table.h" #include "roctracer.h" +#ifdef __cplusplus +#include +#include + namespace roctracer { namespace hsa_support { enum { @@ -129,6 +129,10 @@ struct output_streamer { inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << ""; return out; } }; };}; +#else // !__cplusplus +typedef void* hsa_amd_queue_intercept_handler; +typedef void* hsa_amd_runtime_queue_notifier; +#endif //! __cplusplus #include "inc/hsa_prof_str.h" #endif // INC_ROCTRACER_HSA_H_ diff --git a/script/hsaap.py b/script/hsaap.py index e9a7b0463f..1e413a5295 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -440,6 +440,7 @@ class API_DescrParser: # generate stream operator def gen_out_stream(self, n, name, call, struct): if n == -1: + self.content += '#ifdef __cplusplus\n' self.content += 'typedef std::pair hsa_api_data_pair_t;\n' self.content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n' self.content += ' const uint32_t cid = data_pair.first;\n' @@ -483,6 +484,7 @@ class API_DescrParser: self.content += ' }\n' self.content += ' return out;\n' self.content += '}\n' + self.content += '#endif\n' ############################################################# # main diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 05593efedd..38750ba5a9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -30,14 +30,15 @@ set ( HSA_TEST_DIR "${TEST_DIR}/hsa/test" ) set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) ## build HIP tests +set ( TEST_CFLAGS HIP_VDI=${HIP_VDI} ROCM_INC_PATH=${ROCM_INC_PATH} ) add_custom_target( mytest COMMAND make -C "${TEST_DIR}/MatrixTranspose" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" - COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND ${TEST_CFLAGS} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" - COMMAND MGPU_TEST=1 HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND MGPU_TEST=1 ${TEST_CFLAGS} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_mgpu" - COMMAND C_TEST=1 HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND C_TEST=1 ${TEST_CFLAGS} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_ctest" ) diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index dc5c42aa65..04780535ee 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -1,7 +1,7 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so -HSA_KMT_INC_PATH ?= /opt/rocm/include +ROCM_INC_PATH ?= /opt/rocm/include HIP_VDI ?= 0 ITERATIONS ?= 100 @@ -15,7 +15,7 @@ TARGET=hcc EXECUTABLE=./MatrixTranspose OBJECTS = MatrixTranspose.o -FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) +FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${ROCM_INC_PATH}/hsa -I${ROCM_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -DAMD_INTERNAL_BUILD=1 ifeq ($(C_TEST), 1) COMP=gcc diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index c1189e9387..cc261f4312 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -203,6 +203,7 @@ int main() { #if 1 #include #include +#include #include #include From b2d124d2bebc6337f0d0b4af7f4a325ac167d97e Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 31 Dec 2019 07:05:32 -0600 Subject: [PATCH 44/94] adding API specification --- doc/roctracer_spec.md | 683 ++++++++++++++++++++++++++++++++++++++++ inc/ext/prof_protocol.h | 5 +- inc/roctracer.h | 8 +- 3 files changed, 690 insertions(+), 6 deletions(-) create mode 100644 doc/roctracer_spec.md diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md new file mode 100644 index 0000000000..96847bc1e0 --- /dev/null +++ b/doc/roctracer_spec.md @@ -0,0 +1,683 @@ +# ROC Profiler Library Specification +``` +The rocTracer API is agnostic to specific runtime and may trace +the runtime API calls and asynchronous GPU activity. +``` +## 1. High level overview +``` +The goal of the implementation is to provide a runtime independent API +for tracing of runtime calls and asynchronous activity, like GPU kernel +dispatches and memory moves. The tracing includes callback API for +runtime API tracing and activity API for asynchronous activity records +logging. +Depending on particular runtime intercepting mechanism, the rocTracer +library can be dynamically linked, dynamically loaded by the runtime as +a plugin or some API wrapper can be loaded using LD_PRELOAD. +The library has a C API. + +The rocTracer library is an API that intercepts runtime API calls and +traces asynchronous activity. The activity tracing results are recorded +in a ring buffer. +``` +## 2. General API +### 2.1. Description +``` +The library supports method for getting the error number and error string +of the last failed library API call. It allows to check the conformance +of used library API header and the library binary, the version macros and +API methods can be used. +Returning the error and error string methods: +• roctracer_status_t – error code enumeration +• roctracer_error_string – method for returning the error string +Library version: +• ROCTRACER_VERSION_MAJOR – API major version macro +• ROCTRACER_VERSION_MINOR – API minor version macro +• roctracer_version_major – library major version +• roctracer_version_minor – library minor version +``` +### 2.2. Error codes and error string methods +``` +Error code enumeration +typedef enum { + ROCTRACER_STATUS_SUCCESS = 0, + ROCTRACER_STATUS_ERROR = 1, + ROCTRACER_STATUS_UNINIT = 2, + ROCTRACER_STATUS_BREAK = 3, + ROCTRACER_STATUS_BAD_DOMAIN = 4, + ROCTRACER_STATUS_BAD_PARAMETER = 5, + ROCTRACER_STATUS_HIP_API_ERR = 6, + ROCTRACER_STATUS_HCC_OPS_ERR = 7, + ROCTRACER_STATUS_ROCTX_ERR = 8, +} roctracer_status_t; + +Return error string: +const char* roctracer_error_string(); +``` +### 2.3. Library version +``` +The library provides major and minor versions. Major version is for +incompatible API changes and minor version for bug fixes. +API version macros defined in the library API header ‘roctracer.h’: +ROCTRACER_VERSION_MAJOR +ROCTRACER_VERSION_MINOR + +Methods to check library major and minor venison: +uint32_t roctracer_major_version(); +uint32_t roctracer_minor_version(); +``` +## 3. Frontend API +### 3.1. Description +``` +The rocTracer provides support for runtime API callbacks and activity +records logging. The APIs of different runtimes at different levels +are considered as different API domains with assigned domain IDs. For +example, language level and driver level. The API callbacks provide +the API calls arguments and are called on two phases on “enter” and +on “exit”. The activity records are logged to the ring buffer and can +be associated with the respective API calls using the correlation ID. +Activity API can be used to enable collecting of the records with +timestamping data for API calls and asynchronous activity like the +kernel submits, memory copies and barriers +Tracing domains: +• roctracer_domain_t – runtime API domains, HIP, HSA, etc… +• roctracer_op_string – Return Op string by given domain and + activity Op code +• roctracer_op_code – Return Op code and kind by given string + +Callback API: +• roctracer_rtapi_callback_t – runtime API callback type +• roctracer_enable_op_callback – enable runtime API callback + by domain and Op code +• roctracer_enable_domain_callback – enable runtime API callback + by domain for all Ops +• roctracer_enable_callback – enable runtime API callback for + all domains, all Ops +• roctracer_disable_op_callback – disable runtime API callback + by domain and Op code +• roctracer_enable_op_callback – enable runtime API callback + by domain for all Ops +• roctracer_enable_op_callback – enable runtime API callback for + all domains, all Ops + +Activity API: +• roctracer_record_t – activity record +• roctracer_pool_t – records pool type +• roctracer_allocator_t – tracer allocator type +• roctracer_buffer_callback_t – pool callback type +• roctracer_open_pool[_expl] – create records pool +• roctracer_close_pool[_expl] – close records pool +• roctracer_default_pool[_expl] – get/set default pool +• roctracer_properties_t – tracer properties +• roctracer_enable_op_activity[_expl] – enable activity records + logging +• roctracer_enable_domain_activity[_expl] – enable activity records + logging +• roctracer_enable_activity[_expl] – enable activity records logging +• roctracer_disable_op_activity – disable activity records logging +• roctracer_disable_domain_activity – disable activity records + logging +• roctracer_disable_activity – disable activity records logging +• roctracer_flush_activity[_expl] – disable activity records logging +• roctracer_next_record – return next record +• roctracer_get_timestamp – return correlated GPU/CPU system + timestamp +``` +### 3.2. Tracing Domains +``` +Various tracing domains are supported. Each domain is assigned with +a domain ID. The domains include HSA, HIP, and HCC runtime levels. +Traced API domains: +typedef enum { + ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain + ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain + ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain + ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain + ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain + ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain + ACTIVITY_DOMAIN_NUMBER = 7 +} activity_domain_t; + +Return name by given domain and Op code: +const char* roctracer_op_string( // NULL returned on error and error number + // is set + uint32_t domain, // tracing domain + uint32_t op, // activity op code + uint32_t kind); // activity kind +Return Op code and kind by given string: +roctracer_status_t roctracer_op_code( + uint32_t domain, // tracing domain + const char* str, // [in] op string + uint32_t* op, // [out] op code + uint32_t* kind); // [out] op kind code if not NULL +``` +### 3.3. Callback API +``` +The tracer provides support for runtime API callbacks and activity records +logging. The API callbacks provide the API calls arguments and are called +on two phases on “enter”, on “exit”. + +API phase passed to the callbacks: +typedef enum { + ROCTRACER_API_PHASE_ENTER, + ROCTRACER_API_PHASE_EXIT, +} roctracer_api_phase_t; + +Runtime API callback type: +typedef void (*roctracer_rtapi_callback_t)( + uint32_t domain, // runtime API domain + uint32_t cid, // API call ID + const void* data, // [in] callback data with correlation id and the call + // arguments + void* arg); // [in/out] user passed data + +Enable runtime API callbacks: +roctracer_status_t roctracer_enable_op_callback( + activity_domain_t domain, // tracing domain + uint32_t op, // API call ID + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + +roctracer_status_t roctracer_enable_domain_callback( + activity_domain_t domain, // tracing domain + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + + +roctracer_status_t roctracer_enable_callback( + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + +Disable runtime API callbacks: +roctracer_status_t roctracer_disable_op_callback( + activity_domain_t domain, // tracing domain + uint32_t op); // API call ID + +roctracer_status_t roctracer_disable_domain_callback( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_disable_callback(); +``` +### 3.4 Activity API +``` +The activity records are asynchronously logged to the pool and can be +associated with the respective API callbacks using the correlation ID. +Activity API can be used to enable collecting the records with +timestamp data for API calls and GPU activity like kernel submits, +memory copies, and barriers. + +// Correlation id +typedef uint64_t activity_correlation_id_t; + +Activity record type: + +// Activity record type +struct activity_record_t { + uint32_t domain; // activity domain id + activity_kind_t kind; // activity kind + activity_op_t op; // activity op + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp + union { + struct { + int device_id; // device id + uint64_t queue_id; // queue id + }; + struct { + uint32_t process_id; // device id + uint32_t thread_id; // thread id + }; + struct { + activity_correlation_id_t external_id; // external correlation id + }; + }; + size_t bytes; // data size bytes +}; + +Return next record: +static inline int roctracer_next_record( + const activity_record_t* record, // [in] record ptr + const activity_record_t** next); // [out] next record ptr + +Tracer allocator type: +typedef void (*roctracer_allocator_t)( + char** ptr, // memory pointer + size_t size, // memory size + void* arg); // allocator arg + +Pool callback type: +typedef void (*roctracer_buffer_callback_t)( + const char* begin, // [in] available buffered trace records + const char* end, // [in] end of buffered trace records + void* arg); // [in/out] callback arg + +Tracer properties: +typedef struct { + uint32_t mode; // roctracer mode + size_t buffer_size; // buffer size + // power of 2 + roctracer_allocator_t alloc_fun; // memory allocator + // function pointer + void* alloc_arg; // memory allocator + // function pointer + roctracer_buffer_callback_t buffer_callback_fun; // tracer record + // callback function + void* buffer_callback_arg; // tracer record + // callback arg +} roctracer_properties_t; + +Tracer memory pool handle type: +typedef void roctracer_pool_t; + +Create tracer memory pool: +roctracer_status_t roctracer_open_pool( + const roctracer_properties_t* properties); // tracer pool properties + +roctracer_status_t roctracer_open_pool_expl( + const roctracer_properties_t* properties, // tracer pool properties + roctracer_pool_t** pool); // [out] returns tracer pool if + // not NULL, otherwise sets the + // default one if it is not set + // yet; otherwise the error is + // generated + +Close tracer memory pool: +roctracer_status_t roctracer_close_pool(); + +roctracer_status_t roctracer_close_pool_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Return current default pool. Set new default pool if the argument is not NULL: +roctracer_pool_t* roctracer_default_pool(); + +roctracer_pool_t* roctracer_default_pool_expl( + roctracer_pool_t* pool); // new default pool if not NULL +``` +Enable activity records logging: +``` +roctracer_status_t roctracer_enable_op_activity( + activity_domain_t domain, // tracing domain + uint32_t op); // activity op ID + +roctracer_status_t roctracer_enable_op_activity_expl( + activity_domain_t domain, // tracing domain + uint32_t op, // activity op ID + roctracer_pool_t* pool); // memory pool, NULL means default pool + +roctracer_status_t roctracer_enable_domain_activity( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_enable_domain_activity_expl( + activity_domain_t domain, // tracing domain + roctracer_pool_t* pool); // memory pool, NULL means default pool + +roctracer_status_t roctracer_enable_activity(); + +roctracer_status_t roctracer_enable_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Disable activity records logging: +roctracer_status_t roctracer_disable_op_activity( + activity_domain_t domain, // tracing domain + uint32_t op); // activity op ID + +roctracer_status_t roctracer_disable_domain_activity( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_disable_activity(); + +Flush available activity records: +roctracer_status_t roctracer_flush_activity(); + +roctracer_status_t roctracer_flush_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Return correlated GPU/CPU system timestamp: +roctracer_status_t roctracer_get_timestamp( + uint64_t* timestamp); // [out] return timestamp +``` +## 4. rocTracer Usage Code Examples +### 4.1. HIP API and HCC ops, GPU Activity Tracing +``` +#include +#include + +// HIP API callback function +void hip_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const hip_api_data_t* data = reinterpret_cast + (callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + +} + +// Activity tracing callback +void activity_callback(const char* begin, const char* end, void* arg) { + const roctracer_record_t* record = reinterpret_cast(begin); + const roctracer_record_t* end_record = reinterpret_cast(end); + fprintf(stdout, "\tActivity records:\n"); + while (record < end_record) { + const char * name = roctracer_op_string(record->domain, + record->activity_id, 0); + fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) + device_id(%d) stream_id(%lu)\n", + name, + record->correlation_id, + record->begin_ns, + record->end_ns, + record->device_id, + record->stream_id + ); + + ROCTRACER_CALL(roctracer_next_record(record, &record)); + } +} + +int main() { + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 12; + properties.buffer_callback_fun = activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + + // Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP + // API calls. + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, + HIP_API_ID_hipModuleLaunchKernel, + hip_api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_op_acticity(ACTIVITY_DOMAIN_HIP_API, + HIP_API_ID_hipModuleLaunchKernel)); + // Enable HIP kernel dispatch activity tracing + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HCC_OPS, + hc::HSA_OP_ID_DISPATCH)); + + + + // Disable tracing and closing the pool + ROCTRACER_CALL(roctracer_disable_callback()); + ROCTRACER_CALL(roctracer_disable_activity()); + ROCTRACER_CALL(roctracer_close_pool()); +} +``` +### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled +``` +This shows a MatrixTranspose HIP sample with enabled tracing of +all HIP API and all GPU asynchronous activity. +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +// hip header file +#include + +#ifndef ITERATIONS +# define ITERATIONS 100 +#endif +#define WIDTH 1024 + + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +// Device (Kernel) function, it must be void +// hipLaunchParm provides the execution configuration +__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in, + const int width) { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + out[y * width + x] = in[x * width + y]; +} + +// CPU implementation of matrix transpose +void matrixTransposeCPUReference(float* output, float* input, const unsigned + int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + +int iterations = ITERATIONS; +void start_tracing(); +void stop_tracing(); + +int main() { + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; + + float* gpuMatrix; + float* gpuTransposeMatrix; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + + std::cout << "Device name " << devProp.name << std::endl; + + int i; + int errors; + + while (iterations-- > 0) { + start_tracing(); + + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), + hipMemcpyHostToDevice); + + // Lauching kernel from host + hipLaunchKernel(matrixTranspose, + dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / + THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, + gpuTransposeMatrix, gpuMatrix, WIDTH); + + // Memory transfer from device to host + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), + hipMemcpyDeviceToHost); + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + errors = 0; + double eps = 1.0E-6; + for (i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + + stop_tracing(); + } + + return errors; +} + +///////////////////////////////////////////////////////////////////////////// +// HIP/HCC Callbacks/Activity tracing +///////////////////////////////////////////////////////////////////////////// +#include +#include + +// Macro to check ROC-tracer calls status +#define ROCTRACER_CALL(call) \ + do { \ + int err = call; \ + if (err != 0) { \ + std::cerr << roctracer_error_string() << std::endl << std::flush; \ + abort(); \ + } \ + } while (0) + +// HIP API callback function +void hip_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const hip_api_data_t* data = reinterpret_cast + (callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + switch (cid) { + case HIP_API_ID_hipMemcpy: + fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)", + data->args.hipMemcpy.dst, + data->args.hipMemcpy.src, + (uint32_t)(data->args.hipMemcpy.sizeBytes), + (uint32_t)(data->args.hipMemcpy.kind)); + break; + case HIP_API_ID_hipMalloc: + fprintf(stdout, "ptr(%p) size(0x%x)", + data->args.hipMalloc.ptr, + (uint32_t)(data->args.hipMalloc.size)); + break; + case HIP_API_ID_hipFree: + fprintf(stdout, "ptr(%p), + data->args.hipFree.ptr); + break; + case HIP_API_ID_hipModuleLaunchKernel: + fprintf(stdout, "kernel(\"%s\") stream(%p)", + hipKernelNameRef(data->args.hipModuleLaunchKernel.f), + data->args.hipModuleLaunchKernel.stream); + break; + default: + break; + } + } else { + switch (cid) { + case HIP_API_ID_hipMalloc: + fprintf(stdout, "*ptr(0x%p)", + *(data->args.hipMalloc.ptr)); + break; + default: + break; + } + } + fprintf(stdout, "\n"); fflush(stdout); +} + +// Activity tracing callback +// hipMalloc id(3) correlation_id(1): +// begin_ns(1525888652762640464) end_ns(1525888652762877067) +void activity_callback(const char* begin, const char* end, void* arg) { + const roctracer_record_t* record = reinterpret_cast + (begin); + const roctracer_record_t* end_record = reinterpret_cast + (end); + fprintf(stdout, "\tActivity records:\n"); fflush(stdout); + while (record < end_record) { + const char * name = roctracer_op_string(record->domain, + record->activity_id, 0); + fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \ + device_id(%d) stream_id(%lu)", + name, + record->correlation_id, + record->begin_ns, + record->end_ns, + record->device_id, + record->stream_id + ); + if (record->kind == hc::HSA_OP_ID_COPY) + fprintf(stdout, " bytes(0x%zx)", record->bytes); + fprintf(stdout, "\n"); + fflush(stdout); + ROCTRACER_CALL(roctracer_next_record(record, &record)); + } +} + +// Start tracing routine +void start_tracing() { + std::cout << "# START #############################" << std::endl + << std::flush; + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 0x1000; + properties.buffer_callback_fun = activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + // Enable API callbacks, all domains + ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL)); + // Enable activity tracing, all domains + ROCTRACER_CALL(roctracer_enable_activity()); +} + +// Stop tracing routine +void stop_tracing() { + ROCTRACER_CALL(roctracer_disable_api_callback()); + ROCTRACER_CALL(roctracer_disable_api_activity()); + ROCTRACER_CALL(roctracer_close_pool()); + std::cout << "# STOP #############################" << std::endl + << std::flush; +} +///////////////////////////////////////////////////////////////////////////// +``` diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index ee52e91082..c578df0fd4 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -29,9 +29,10 @@ THE SOFTWARE. typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain + ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain diff --git a/inc/roctracer.h b/inc/roctracer.h index 0fc6df0063..f243267284 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -245,6 +245,10 @@ roctracer_status_t roctracer_flush_activity() return roctracer_flush_activity_expl(NULL); } +// Get system timestamp +roctracer_status_t roctracer_get_timestamp( + uint64_t* timestamp); // [out] return timestamp + // Load/Unload methods // Set properties roctracer_status_t roctracer_set_properties( @@ -260,10 +264,6 @@ bool roctracer_load( void roctracer_unload(bool destruct); -// Get system timestamp. -roctracer_status_t roctracer_get_timestamp( - uint64_t* timestamp); - #ifdef __cplusplus } // extern "C" block #endif // __cplusplus From 272cdcb13f55de07ce4883e7c44d37c5ca32569f Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 31 Dec 2019 07:09:22 -0600 Subject: [PATCH 45/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 96847bc1e0..21358a2fa3 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -57,6 +57,7 @@ const char* roctracer_error_string(); ``` The library provides major and minor versions. Major version is for incompatible API changes and minor version for bug fixes. + API version macros defined in the library API header ‘roctracer.h’: ROCTRACER_VERSION_MAJOR ROCTRACER_VERSION_MINOR @@ -78,6 +79,7 @@ be associated with the respective API calls using the correlation ID. Activity API can be used to enable collecting of the records with timestamping data for API calls and asynchronous activity like the kernel submits, memory copies and barriers + Tracing domains: • roctracer_domain_t – runtime API domains, HIP, HSA, etc… • roctracer_op_string – Return Op string by given domain and @@ -126,6 +128,7 @@ Activity API: ``` Various tracing domains are supported. Each domain is assigned with a domain ID. The domains include HSA, HIP, and HCC runtime levels. + Traced API domains: typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain @@ -415,6 +418,7 @@ int main() { ``` This shows a MatrixTranspose HIP sample with enabled tracing of all HIP API and all GPU asynchronous activity. + /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. From e14a217e43fd38a3c87e28f3e86af0339132b505 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 31 Dec 2019 07:10:18 -0600 Subject: [PATCH 46/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 21358a2fa3..248de6604c 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -10,6 +10,7 @@ for tracing of runtime calls and asynchronous activity, like GPU kernel dispatches and memory moves. The tracing includes callback API for runtime API tracing and activity API for asynchronous activity records logging. + Depending on particular runtime intercepting mechanism, the rocTracer library can be dynamically linked, dynamically loaded by the runtime as a plugin or some API wrapper can be loaded using LD_PRELOAD. @@ -26,9 +27,11 @@ The library supports method for getting the error number and error string of the last failed library API call. It allows to check the conformance of used library API header and the library binary, the version macros and API methods can be used. + Returning the error and error string methods: • roctracer_status_t – error code enumeration • roctracer_error_string – method for returning the error string + Library version: • ROCTRACER_VERSION_MAJOR – API major version macro • ROCTRACER_VERSION_MINOR – API minor version macro @@ -37,7 +40,7 @@ Library version: ``` ### 2.2. Error codes and error string methods ``` -Error code enumeration +Error code enumeration: typedef enum { ROCTRACER_STATUS_SUCCESS = 0, ROCTRACER_STATUS_ERROR = 1, From 3fd10c8d368a9e7cba04c02172bcfad58207f19e Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 31 Dec 2019 07:14:00 -0600 Subject: [PATCH 47/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 248de6604c..15b4199810 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -113,19 +113,15 @@ Activity API: • roctracer_close_pool[_expl] – close records pool • roctracer_default_pool[_expl] – get/set default pool • roctracer_properties_t – tracer properties -• roctracer_enable_op_activity[_expl] – enable activity records - logging -• roctracer_enable_domain_activity[_expl] – enable activity records - logging +• roctracer_enable_op_activity[_expl] – enable activity records logging +• roctracer_enable_domain_activity[_expl] – enable activity records logging • roctracer_enable_activity[_expl] – enable activity records logging • roctracer_disable_op_activity – disable activity records logging -• roctracer_disable_domain_activity – disable activity records - logging +• roctracer_disable_domain_activity – disable activity records logging • roctracer_disable_activity – disable activity records logging • roctracer_flush_activity[_expl] – disable activity records logging • roctracer_next_record – return next record -• roctracer_get_timestamp – return correlated GPU/CPU system - timestamp +• roctracer_get_timestamp – return correlated GPU/CPU system timestamp ``` ### 3.2. Tracing Domains ``` @@ -145,10 +141,9 @@ typedef enum { } activity_domain_t; Return name by given domain and Op code: -const char* roctracer_op_string( // NULL returned on error and error number - // is set - uint32_t domain, // tracing domain - uint32_t op, // activity op code +const char* roctracer_op_string( // NULL returned on error and error number is set + uint32_t domain, // tracing domain + uint32_t op, // activity op code uint32_t kind); // activity kind Return Op code and kind by given string: roctracer_status_t roctracer_op_code( @@ -172,9 +167,9 @@ typedef enum { Runtime API callback type: typedef void (*roctracer_rtapi_callback_t)( uint32_t domain, // runtime API domain - uint32_t cid, // API call ID + uint32_t cid, // API call ID const void* data, // [in] callback data with correlation id and the call - // arguments + // arguments void* arg); // [in/out] user passed data Enable runtime API callbacks: From b9ded61eb3c77a25b65d1c40d70ab88720be412e Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Mon, 6 Jan 2020 13:42:02 -0600 Subject: [PATCH 48/94] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f4965b92f8..71aa93aff0 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ## The library source tree ``` + - doc - documentation - inc/roctracer.h - rocTacer library public API header - inc/roctx.h - rocTX library puiblic API header - src - Library sources @@ -25,12 +26,11 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ## Documentation ``` - - API description/headers: - - inc/roctracer.h - - inc/roctx.h + - API description: + - ['roctracer' profiling C API specification](doc/roctracer_spec.md) - Code examples: - - test/MatrixTranspose_test/MatrixTranspose.cpp - - test/MatrixTranspose/MatrixTranspose.cpp + - [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp) + - [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp) ``` ## To build and run test From 5d2539dd446db2a440cb43366ad44b9ef68d4ede Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Mon, 6 Jan 2020 13:42:44 -0600 Subject: [PATCH 49/94] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 71aa93aff0..e9cf8771cc 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,11 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ``` ## Documentation -``` - API description: - ['roctracer' profiling C API specification](doc/roctracer_spec.md) - Code examples: - [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp) - [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp) -``` ## To build and run test ``` From 6242588fb3859830f33d9e839ad520a7ce3ed069 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 7 Jan 2020 13:11:25 -0600 Subject: [PATCH 50/94] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e9cf8771cc..993f44d576 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ## To build and run test ``` - - ROCm-2.3 or higher is required + - ROCm is required - Python2.7 is required. The required modules: CppHeaderParser, argparse. To install: From 7f9dae599224f0ada4313649379d4c10e46d9519 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 7 Jan 2020 13:13:16 -0600 Subject: [PATCH 51/94] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 993f44d576..dc18355675 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ## To build and run test ``` - ROCm is required + - Python2.7 is required. The required modules: CppHeaderParser, argparse. To install: From ee4df1f5f93f705cafe16e4c5b4b61dca0acb404 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 7 Jan 2020 17:11:22 -0600 Subject: [PATCH 52/94] Update README.md --- README.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index dc18355675..00583f1a5b 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,19 @@ ROC-TX librray: code annotation evemts API Includes basic API: roctxMark, roctxRangePush, roctxRangePop. ``` +## Usage +``` +rocTracer API: + To use the rocTracer API you need the API header and to link your application with roctracer .so librray: + - the API header: /opt/rocm/roctracer/include/roctracer.h + - the .so library: /opt/rocm/lib/libroctracer64.so + +rocTX API: + To use the rocTX API you need the API header and to link your application with roctx .so librray: + - the API header: /opt/rocm/roctracer/include/roctx.h + - the .so library: /opt/rocm/lib/libroctx64.so +``` + ## The library source tree ``` - doc - documentation @@ -62,16 +75,3 @@ Includes basic API: roctxMark, roctxRangePush, roctxRangePop. or make package && dpkg -i *.deb ``` - -## Usage -``` -rocTracer API: - To use the rocTracer API you need the API header and to link your application with roctracer .so librray: - - the API header: /opt/rocm/roctracer/include/roctracer.h - - the .so library: /opt/rocm/lib/libroctracer64.so - -rocTX API: - To use the rocTX API you need the API header and to link your application with roctx .so librray: - - the API header: /opt/rocm/roctracer/include/roctx.h - - the .so library: /opt/rocm/lib/libroctx64.so - From a64f0538bbe1dd025210a63e7a01979f7fc527b0 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 9 Jan 2020 17:38:58 -0600 Subject: [PATCH 53/94] test makefile fix --- build.sh | 1 - test/MatrixTranspose_test/Makefile | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/build.sh b/build.sh index b94e58da94..873631c034 100755 --- a/build.sh +++ b/build.sh @@ -21,7 +21,6 @@ if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) -MAKE_OPTS="-j 8 -C $BUILD_DIR" mkdir -p $BUILD_DIR pushd $BUILD_DIR diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index 04780535ee..d25f64340b 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -20,7 +20,7 @@ FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${ROCM_INC_PATH}/hsa -I${ROCM_INC_ ifeq ($(C_TEST), 1) COMP=gcc SOURCES = MatrixTranspose.c - FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I/opt/rocm/hcc/include + FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I${ROCM_INC_PATH}/hcc else COMP=$(HIPCC) SOURCES = MatrixTranspose.cpp From 5bfb079aff72de81c1c1ea931ce88f5203b17416 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Sun, 1 Dec 2019 13:53:22 -0800 Subject: [PATCH 54/94] roctracer changes to support multiple ROCM installation - Package is generated to install into ROCM_PATH by setting to CMAKE_INSTALL_PREFIX, if defined in the env otherwise default into /opt/rocm - Lib SO version is added dependent on build version - RUNPATH is set to a default value based on /opt/rocm and if ROCM_RPATH env is defined it is overwritten. - Symlinks are created for library so files. - ld.so.conf entry is done only if /opt/rocm/roctracer dir exists Signed-off-by: Pruthvi Madugundu --- CMakeLists.txt | 35 +++++++++++++++++++++++++++++++---- DEBIAN/postinst | 5 ++++- RPM/rpm_post | 5 ++++- build.sh | 5 ++++- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e90a4f7924..3a3ce25fd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,9 @@ cmake_minimum_required ( VERSION 3.5.0 ) ## Verbose output. set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE ) +# Install prefix +set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") + ## Set module name and project name. set ( ROCTRACER_NAME "roctracer" ) set ( ROCTRACER_TARGET "${ROCTRACER_NAME}64" ) @@ -43,17 +46,25 @@ include ( env ) ## Setup the package version. get_version ( "1.0.0" ) -message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" ) set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} ) set ( BUILD_VERSION_MINOR ${VERSION_MINOR} ) set ( BUILD_VERSION_PATCH ${VERSION_PATCH} ) -set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) + if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" ) message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" ) set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" ) endif () -set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) + +set ( LIB_VERSION_MAJOR ${BUILD_VERSION_MAJOR} ) +set ( LIB_VERSION_MINOR ${BUILD_VERSION_MINOR} ) +if (DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set (LIB_VERSION_PATCH $ENV{ROCM_LIBPATCH_VERSION} ) +else () + set (LIB_VERSION_PATCH ${BUILD_VERSION_PATCH} ) +endif() +set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" ) +message ( "-- LIB-VERSION: ${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" ) ## Set target and root/lib/test directory set ( TARGET_NAME "${ROCTRACER_TARGET}" ) @@ -66,7 +77,7 @@ include ( ${LIB_DIR}/CMakeLists.txt ) ## Set the VERSION and SOVERSION values set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" ) -set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" ) +set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) # If the library is a release, strip the target library if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) @@ -84,6 +95,10 @@ add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/include inc-link ) add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link ) +add_custom_target ( so-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link ) +add_custom_target ( so-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link ) ## Install information install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib ) @@ -95,19 +110,31 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext ) install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} ) install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} ) install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool ) ## rocTX set ( ROCTX_TARGET "roctx64" ) set ( ROCTX_LIBRARY "lib${ROCTX_TARGET}" ) +## Set the VERSION and SOVERSION values +set_property ( TARGET ${ROCTX_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" ) +set_property ( TARGET ${ROCTX_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) + add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) +add_custom_target ( so-roctx-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link ) +add_custom_target ( so-roctx-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link ) install ( TARGETS "roctx64" LIBRARY DESTINATION lib ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} ) ## KFD wrapper if ( DEFINED KFD_WRAPPER ) diff --git a/DEBIAN/postinst b/DEBIAN/postinst index b09a3c139b..f14a4ee14c 100644 --- a/DEBIAN/postinst +++ b/DEBIAN/postinst @@ -3,7 +3,10 @@ set -e do_ldconfig() { - echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig + INSTALL_PATH=/opt/rocm/roctracer + if [ -e "${INSTALL_PATH}" ] ; then + echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig + fi } case "$1" in diff --git a/RPM/rpm_post b/RPM/rpm_post index a19ea861f1..1e5e279075 100644 --- a/RPM/rpm_post +++ b/RPM/rpm_post @@ -1 +1,4 @@ -echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig +INSTALL_PATH=/opt/rocm/roctracer +if [ -e "${INSTALL_PATH}" ] ; then + echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig +fi diff --git a/build.sh b/build.sh index b94e58da94..1a5aa9bcba 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,8 @@ #!/bin/bash -x SRC_DIR=`dirname $0` COMPONENT="roctracer" -ROCM_PATH="/opt/rocm" +ROCM_PATH="${ROCM_PATH:=/opt/rocm}" +LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib:/opt/rocm/lib64" fatal() { echo "$1" @@ -19,6 +20,7 @@ if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi +if ! [ -z ${ROCM_RPATH+x} ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) MAKE_OPTS="-j 8 -C $BUILD_DIR" @@ -33,6 +35,7 @@ cmake \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="DEB;RPM" \ + -DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \ $HIP_VDI_OPT \ $ROCTRACER_ROOT make From eb455edfea786098b3eed8ed846ba08df34bc4e1 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:28:58 -0600 Subject: [PATCH 55/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 15b4199810..01a489f007 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -1,7 +1,8 @@ -# ROC Profiler Library Specification +# ROC Tracer Library Specification ``` The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. +Also, application code annotation rocTX API is provided. ``` ## 1. High level overview ``` @@ -19,6 +20,10 @@ The library has a C API. The rocTracer library is an API that intercepts runtime API calls and traces asynchronous activity. The activity tracing results are recorded in a ring buffer. + +The rocTX contains application code instrumentation API to support high +level correlation of runtime API/activity events. The API includes mark +and nested ranges. ``` ## 2. General API ### 2.1. Description @@ -122,6 +127,17 @@ Activity API: • roctracer_flush_activity[_expl] – disable activity records logging • roctracer_next_record – return next record • roctracer_get_timestamp – return correlated GPU/CPU system timestamp + +External correlation ID API: +• roctracer_activity_push_external_correlation_id - push an external + correlation id for the calling thread +• roctracer_activity_pop_external_correlation_id - pop an external + correlation id for the calling thread + +Tracing control API: +• roctracer_start – tracing start +• roctracer_stop – tracer stop + ``` ### 3.2. Tracing Domains ``` @@ -338,6 +354,35 @@ Return correlated GPU/CPU system timestamp: roctracer_status_t roctracer_get_timestamp( uint64_t* timestamp); // [out] return timestamp ``` +External correlation ID API +``` +The API provides activity records to associate rocTracer correlation IDs with +IDs provided by external APIs. The external ID records are identified by +ACTIVITY_DOMAIN_EXT_API domain value. +Using the ‘push’ method an external ID is pushed to a per CPU thread stack and +the ‘pop’ method can be used to remove the last pushed ID. +An external ID record is inserted before any generated rocTracer activity record +if the same CPU external ID stack is non-empty. + +Notifies that the calling thread is entering an external API region. +Push an external correlation id for the calling thread. +roctracer_status_t roctracer_activity_push_external_correlation_id( + activity_correlation_id_t id); // external correlation id + +Notifies that the calling thread is leaving an external API region. +Pop an external correlation id for the calling thread. +roctracer_status_t roctracer_activity_pop_external_correlation_id( + activity_correlation_id_t* last_id); // returns the last external correlation id + // if not NULL +``` +Tracing control API +``` +Tracing start: +void roctracer_start(); + +Tracing stop: +void roctracer_stop(); +``` ## 4. rocTracer Usage Code Examples ### 4.1. HIP API and HCC ops, GPU Activity Tracing ``` From 9b6d20eca2c6495ce93faa11ea77fd42dfce9064 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:32:22 -0600 Subject: [PATCH 56/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 01a489f007..a0f42b658c 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -728,3 +728,18 @@ void stop_tracing() { } ///////////////////////////////////////////////////////////////////////////// ``` +'rocTX' application code annotation +``` +Basic API: markers and nested ranges. +// A marker created by given ASCII massage +void roctxMark(const char* message); + +// Returns the 0 based level of a nested range being started by given message associated to this range. +// A negative value is returned on the error. +int roctxRangePush(const char* message); + +// Marks the end of a nested range. +// Returns the 0 based level the range. +// A negative value is returned on the error. +int roctxRangePop(); +``` From f261f29d1ea154199ce16c3821d563f299b175f0 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:35:16 -0600 Subject: [PATCH 57/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index a0f42b658c..87a6090309 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -1,8 +1,8 @@ # ROC Tracer Library Specification ``` -The rocTracer API is agnostic to specific runtime and may trace +- The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. -Also, application code annotation rocTX API is provided. +- The rocTX API is provided for application code annotation. ``` ## 1. High level overview ``` @@ -728,7 +728,7 @@ void stop_tracing() { } ///////////////////////////////////////////////////////////////////////////// ``` -'rocTX' application code annotation +## 5. rocTX application code annotation ``` Basic API: markers and nested ranges. // A marker created by given ASCII massage From 32837113f971740be8c24f40196c52dd76000c7d Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:37:57 -0600 Subject: [PATCH 58/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index 87a6090309..c49462faa5 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -1,4 +1,4 @@ -# ROC Tracer Library Specification +# ROC Tracer / ROC-TX Libraries Specification ``` - The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. From d9225b2de515cd05040c8b90d637036c809ecad7 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:40:02 -0600 Subject: [PATCH 59/94] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 00583f1a5b..497c3da895 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ rocTX API: ## Documentation - API description: - - ['roctracer' profiling C API specification](doc/roctracer_spec.md) + - ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md) - Code examples: - [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp) - [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp) From 1d29f4a6819a2f50ff6ada47f09cf0266fc06937 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:41:05 -0600 Subject: [PATCH 60/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index c49462faa5..dbe6eefc50 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -728,7 +728,7 @@ void stop_tracing() { } ///////////////////////////////////////////////////////////////////////////// ``` -## 5. rocTX application code annotation +## 5. rocTX application code annotation API ``` Basic API: markers and nested ranges. // A marker created by given ASCII massage From e3e0ca8ae6f3e7112fa30dadbec029862a127695 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Sat, 11 Jan 2020 05:44:06 -0600 Subject: [PATCH 61/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index dbe6eefc50..ae73ba1acb 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -730,7 +730,7 @@ void stop_tracing() { ``` ## 5. rocTX application code annotation API ``` -Basic API: markers and nested ranges. +Basic annotation API: markers and nested ranges. // A marker created by given ASCII massage void roctxMark(const char* message); From 844b7bf7de7567fe24eda33502189d66dafccf2b Mon Sep 17 00:00:00 2001 From: Xiaozhu Meng Date: Mon, 13 Jan 2020 11:38:55 -0600 Subject: [PATCH 62/94] Fix link time multiple definition problem caused by function definition in header files --- inc/roctracer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/inc/roctracer.h b/inc/roctracer.h index f243267284..1db73cee77 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -179,7 +179,7 @@ roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, // tracer pool properties roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, // otherwise sets the default one if it is not set yet -roctracer_status_t roctracer_open_pool( +static inline roctracer_status_t roctracer_open_pool( const roctracer_properties_t* properties) // tracer pool properties { return roctracer_open_pool_expl(properties, NULL); @@ -189,7 +189,7 @@ roctracer_status_t roctracer_open_pool( // Close tracer memory pool roctracer_status_t roctracer_close_pool_expl( roctracer_pool_t* pool); // [in] memory pool, NULL is a default one -roctracer_status_t roctracer_close_pool() +static inline roctracer_status_t roctracer_close_pool() { return roctracer_close_pool_expl(NULL); } @@ -198,7 +198,7 @@ roctracer_status_t roctracer_close_pool() // Set new default pool if the argument is not NULL roctracer_pool_t* roctracer_default_pool_expl( roctracer_pool_t* pool); // [in] new default pool if not NULL -roctracer_pool_t* roctracer_default_pool() +static inline roctracer_pool_t* roctracer_default_pool() { return roctracer_default_pool_expl(NULL); } @@ -208,7 +208,7 @@ roctracer_status_t roctracer_enable_op_activity_expl( activity_domain_t domain, // tracing domain uint32_t op, // activity op ID roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_op_activity( +static inline roctracer_status_t roctracer_enable_op_activity( activity_domain_t domain, // tracing domain uint32_t op) // activity op ID { @@ -217,14 +217,14 @@ roctracer_status_t roctracer_enable_op_activity( roctracer_status_t roctracer_enable_domain_activity_expl( activity_domain_t domain, // tracing domain roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_domain_activity( +static inline roctracer_status_t roctracer_enable_domain_activity( activity_domain_t domain) // tracing domain { return roctracer_enable_domain_activity_expl(domain, NULL); } roctracer_status_t roctracer_enable_activity_expl( roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_activity() +static inline roctracer_status_t roctracer_enable_activity() { return roctracer_enable_activity_expl(NULL); } @@ -240,7 +240,7 @@ roctracer_status_t roctracer_disable_activity(); // Flush available activity records roctracer_status_t roctracer_flush_activity_expl( roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_flush_activity() +static inline roctracer_status_t roctracer_flush_activity() { return roctracer_flush_activity_expl(NULL); } From f8e0039f39367486f670037eb0074ef889caabc0 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 13 Jan 2020 12:07:11 -0600 Subject: [PATCH 63/94] cosmetic change --- build.sh | 4 ++-- inc/roctracer_ext.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index 5ab8596620..6d04e7b2a0 100755 --- a/build.sh +++ b/build.sh @@ -2,7 +2,7 @@ SRC_DIR=`dirname $0` COMPONENT="roctracer" ROCM_PATH="${ROCM_PATH:=/opt/rocm}" -LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib:/opt/rocm/lib64" +LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64" fatal() { echo "$1" @@ -20,7 +20,7 @@ if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi -if ! [ -z ${ROCM_RPATH+x} ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi +if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) diff --git a/inc/roctracer_ext.h b/inc/roctracer_ext.h index 2427336c7b..172966af6d 100644 --- a/inc/roctracer_ext.h +++ b/inc/roctracer_ext.h @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus //////////////////////////////////////////////////////////////////////////////// -// Application annotatin API +// Application annotation API // Tracing start API void roctracer_start(); From abe07f7b4d00077759d806ab012eeb3620dedff4 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 13 Jan 2020 12:19:04 -0600 Subject: [PATCH 64/94] fixing link errors --- inc/roctracer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/inc/roctracer.h b/inc/roctracer.h index f243267284..1db73cee77 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -179,7 +179,7 @@ roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, // tracer pool properties roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, // otherwise sets the default one if it is not set yet -roctracer_status_t roctracer_open_pool( +static inline roctracer_status_t roctracer_open_pool( const roctracer_properties_t* properties) // tracer pool properties { return roctracer_open_pool_expl(properties, NULL); @@ -189,7 +189,7 @@ roctracer_status_t roctracer_open_pool( // Close tracer memory pool roctracer_status_t roctracer_close_pool_expl( roctracer_pool_t* pool); // [in] memory pool, NULL is a default one -roctracer_status_t roctracer_close_pool() +static inline roctracer_status_t roctracer_close_pool() { return roctracer_close_pool_expl(NULL); } @@ -198,7 +198,7 @@ roctracer_status_t roctracer_close_pool() // Set new default pool if the argument is not NULL roctracer_pool_t* roctracer_default_pool_expl( roctracer_pool_t* pool); // [in] new default pool if not NULL -roctracer_pool_t* roctracer_default_pool() +static inline roctracer_pool_t* roctracer_default_pool() { return roctracer_default_pool_expl(NULL); } @@ -208,7 +208,7 @@ roctracer_status_t roctracer_enable_op_activity_expl( activity_domain_t domain, // tracing domain uint32_t op, // activity op ID roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_op_activity( +static inline roctracer_status_t roctracer_enable_op_activity( activity_domain_t domain, // tracing domain uint32_t op) // activity op ID { @@ -217,14 +217,14 @@ roctracer_status_t roctracer_enable_op_activity( roctracer_status_t roctracer_enable_domain_activity_expl( activity_domain_t domain, // tracing domain roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_domain_activity( +static inline roctracer_status_t roctracer_enable_domain_activity( activity_domain_t domain) // tracing domain { return roctracer_enable_domain_activity_expl(domain, NULL); } roctracer_status_t roctracer_enable_activity_expl( roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_activity() +static inline roctracer_status_t roctracer_enable_activity() { return roctracer_enable_activity_expl(NULL); } @@ -240,7 +240,7 @@ roctracer_status_t roctracer_disable_activity(); // Flush available activity records roctracer_status_t roctracer_flush_activity_expl( roctracer_pool_t* pool); // memory pool, NULL is a default one -roctracer_status_t roctracer_flush_activity() +static inline roctracer_status_t roctracer_flush_activity() { return roctracer_flush_activity_expl(NULL); } From 127cba886ea9f61e0d625fe10d1cb2373304a927 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 14 Jan 2020 10:31:42 -0600 Subject: [PATCH 65/94] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 497c3da895..eeaac8a858 100644 --- a/README.md +++ b/README.md @@ -48,8 +48,7 @@ rocTX API: ``` - ROCm is required - - Python2.7 is required. - The required modules: CppHeaderParser, argparse. + - Python modules requirement: CppHeaderParser, argparse. To install: sudo pip install CppHeaderParser argparse From 1a5609c35dc980b6d7ecd0dd29ade1dd0437cb53 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Tue, 14 Jan 2020 10:32:05 -0600 Subject: [PATCH 66/94] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eeaac8a858..8f3e848113 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ rocTX API: ``` - ROCm is required - - Python modules requirement: CppHeaderParser, argparse. + - Python modules requirements: CppHeaderParser, argparse. To install: sudo pip install CppHeaderParser argparse From 1c79061bb38341eb2d79da54a76bbed22a902155 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 15 Jan 2020 12:57:28 -0600 Subject: [PATCH 67/94] filtering start/stop callback if already started/stopped --- src/core/roctracer.cpp | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index c006e6d88c..7221be3460 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -526,11 +526,25 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( return status; } +// Logger routines and primitives util::Logger::mutex_t util::Logger::mutex_; std::atomic util::Logger::instance_{}; + +// Memory pool routines and primitives MemoryPool* memory_pool = NULL; typedef std::recursive_mutex memory_pool_mutex_t; memory_pool_mutex_t memory_pool_mutex; + +// Stop sttaus routines and primitives +unsigned stop_status_value = 0; +typedef std::mutex stop_status_mutex_t; +stop_status_mutex_t stop_status_mutex; +unsigned set_stopped(unsigned val) { + std::lock_guard lock(stop_status_mutex); + const unsigned ret = (stop_status_value ^ val); + stop_status_value = val; + return ret; +} } // namespace roctracer LOADER_INSTANTIATE(); @@ -1015,16 +1029,20 @@ PUBLIC_API void roctracer_mark(const char* str) { // Start API PUBLIC_API void roctracer_start() { - if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); - roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); - roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); + if (roctracer::set_stopped(0)) { + if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); + roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); + } } // Stop API PUBLIC_API void roctracer_stop() { - roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); - roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); - if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); + if (roctracer::set_stopped(1)) { + roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); + if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); + } } // Set properties From 36a2df3edf5e37961d5688ab6b37d72df6d3f218 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 15 Jan 2020 15:24:44 -0600 Subject: [PATCH 68/94] roctx start/stop minor changes, code cleanup --- test/tool/tracer_tool.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 78751a7463..75c72dd09f 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -188,22 +188,17 @@ void roctx_api_callback( roctx_callback_fun(domain, cid, GetTid(), data->args.message); } -// Start/Stop callbacks -void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) { - const bool* is_stop_ptr = (bool*)arg; - const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA; - const char* message = (*is_stop_ptr == true) ? NULL : data->message; - roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message); +// rocTX Start/Stop callbacks +void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) { + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, data->message); } -void stop_callback() { - bool is_stop = true; - roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); -} -void start_callback() { - bool is_stop = false; - roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); +void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) { + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, NULL); } +void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); } +void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); } +// rocTX buffer flush function void roctx_flush_cb(roctx_trace_entry_t* entry) { std::ostringstream os; os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; From 547b36f9f6b3f9fbed8f904f1ddac28ddd2d412f Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Fri, 17 Jan 2020 11:23:01 -0500 Subject: [PATCH 69/94] Update gen_ostream_ops.py --- script/gen_ostream_ops.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py index fe3c86e364..895e980ed6 100644 --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -122,7 +122,7 @@ def process_struct(f,c,cppHeader,nname,apiname): process_struct(f,nc,cppHeader,name,apiname) -def gen_cppheader(infilepath,outfilepath): +def gen_cppheader(infilepath, includes, outfilepath): try: cppHeader = CppHeaderParser.CppHeader(infilepath) except CppHeaderParser.CppParseError as e: @@ -146,10 +146,8 @@ def gen_cppheader(infilepath,outfilepath): '#include \n' + \ '\n' + \ '#include "roctracer.h"\n' - if apiname == "KFD": - HEADER_S += '#include "hsakmt.h"\n' - if apiname == "HSA": - HEADER_S += '#include \n#include \n#include \n #include "cb_table.h"\n' + for w in includes.split(','): + HEADER_S += '#include "' + w + '"\n' f.write(HEADER_S) f.write('\n') f.write('namespace roctracer {\n') @@ -180,7 +178,6 @@ def gen_cppheader(infilepath,outfilepath): '#endif // INC_BASIC_OSTREAM_OPS_H_\n' + \ ' \n' f.write(FOOTER) - f.close() f2.close() print('File ' + outfilepath + ' generated') @@ -191,10 +188,11 @@ def gen_cppheader(infilepath,outfilepath): parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') requiredNamed = parser.add_argument_group('Required arguments') requiredNamed.add_argument('-in','--in', help='Header file to be parsed', required=True) +requiredNamed.add_argument('-includes','--inc', help='Comma separated list of include file names', required=True) requiredNamed.add_argument('-out','--out', help='Output file with ostream operators', required=True) args = vars(parser.parse_args()) if __name__ == '__main__': - gen_cppheader(args['in'],args['out']) + gen_cppheader(args['in'],args['inc'],args['out']) From fff5d9833fef18510e2e36fce1721796c571fb21 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Wed, 22 Jan 2020 14:06:58 -0500 Subject: [PATCH 70/94] Update gen_ostream_ops.py --- script/gen_ostream_ops.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py index 895e980ed6..2533c6d42b 100644 --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -187,12 +187,11 @@ def gen_cppheader(infilepath, includes, outfilepath): parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') requiredNamed = parser.add_argument_group('Required arguments') -requiredNamed.add_argument('-in','--in', help='Header file to be parsed', required=True) -requiredNamed.add_argument('-includes','--inc', help='Comma separated list of include file names', required=True) -requiredNamed.add_argument('-out','--out', help='Output file with ostream operators', required=True) +requiredNamed.add_argument('-in', metavar='file', help='Header file to be parsed', required=True) +requiredNamed.add_argument('-includes', metavar='list', help='Comma separated list of include file names', required=True) +requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True) args = vars(parser.parse_args()) if __name__ == '__main__': - gen_cppheader(args['in'],args['inc'],args['out']) - + gen_cppheader(args['in'],args['includes'],args['out']) From cf2c9a2e6d354cc7d02cc7887993a9b9f60334ae Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Wed, 22 Jan 2020 14:12:22 -0500 Subject: [PATCH 71/94] Update CMakeLists.txt --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44b9fd81a3..a3499b0859 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,7 +22,7 @@ if ( DEFINED KFD_WRAPPER ) set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -includes hsakmt.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) From 8539272de861f38d451bbad155e1fae00e7b82b7 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Thu, 23 Jan 2020 17:07:42 -0600 Subject: [PATCH 72/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index ae73ba1acb..be6dcfb367 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -1,5 +1,8 @@ # ROC Tracer / ROC-TX Libraries Specification ``` +ROC Tracer API version 2 +ROCTX API version 1 + - The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. - The rocTX API is provided for application code annotation. From fc7826533c3e49ee32d5c390b7b99dcdf8b4e920 Mon Sep 17 00:00:00 2001 From: eshcherb <33529668+eshcherb@users.noreply.github.com> Date: Thu, 23 Jan 2020 17:08:19 -0600 Subject: [PATCH 73/94] Update roctracer_spec.md --- doc/roctracer_spec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md index be6dcfb367..28f8ffcc27 100644 --- a/doc/roctracer_spec.md +++ b/doc/roctracer_spec.md @@ -1,7 +1,7 @@ # ROC Tracer / ROC-TX Libraries Specification ``` ROC Tracer API version 2 -ROCTX API version 1 +ROC-TX API version 1 - The rocTracer API is agnostic to specific runtime and may trace the runtime API calls and asynchronous GPU activity. From 2901da60a5d0c2e4fdbf3782b3a77f0ed16f6e7f Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Fri, 24 Jan 2020 10:19:09 -0500 Subject: [PATCH 74/94] Update gen_ostream_ops.py --- script/gen_ostream_ops.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py index 2533c6d42b..38f7e757cd 100644 --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -121,8 +121,7 @@ def process_struct(f,c,cppHeader,nname,apiname): nc = c+"::" process_struct(f,nc,cppHeader,name,apiname) - -def gen_cppheader(infilepath, includes, outfilepath): +def gen_cppheader(infilepath, outfilepath): try: cppHeader = CppHeaderParser.CppHeader(infilepath) except CppHeaderParser.CppParseError as e: @@ -146,8 +145,6 @@ def gen_cppheader(infilepath, includes, outfilepath): '#include \n' + \ '\n' + \ '#include "roctracer.h"\n' - for w in includes.split(','): - HEADER_S += '#include "' + w + '"\n' f.write(HEADER_S) f.write('\n') f.write('namespace roctracer {\n') @@ -188,10 +185,9 @@ def gen_cppheader(infilepath, includes, outfilepath): parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') requiredNamed = parser.add_argument_group('Required arguments') requiredNamed.add_argument('-in', metavar='file', help='Header file to be parsed', required=True) -requiredNamed.add_argument('-includes', metavar='list', help='Comma separated list of include file names', required=True) requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True) args = vars(parser.parse_args()) if __name__ == '__main__': - gen_cppheader(args['in'],args['includes'],args['out']) + gen_cppheader(args['in'],args['out']) From d30aabefe68c4af4bef58707044fb64396467c0c Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Fri, 24 Jan 2020 10:20:40 -0500 Subject: [PATCH 75/94] Update CMakeLists.txt --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3499b0859..44b9fd81a3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,7 +22,7 @@ if ( DEFINED KFD_WRAPPER ) set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -includes hsakmt.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) From d87ff09280c3e5c56d717c09639d4163e39ed78e Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Mon, 27 Jan 2020 11:58:20 -0500 Subject: [PATCH 76/94] Update CMakeLists.txt --- src/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44b9fd81a3..072ded4994 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,7 +22,8 @@ if ( DEFINED KFD_WRAPPER ) set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) + execute_process ( COMMAND sh -xc "gcc -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${ROOT_DIR}/inc/hsakmttypes_pp.h" ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${ROOT_DIR}/inc/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) From f214221bb26190381d56a3555cce51130672f1fe Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 27 Jan 2020 14:30:44 -0600 Subject: [PATCH 77/94] control of trace buffer thread start --- inc/roctracer.h | 12 +--- src/core/roctracer.cpp | 37 ++++++------ src/core/trace_buffer.h | 116 ++++++++++++++++++++++++++++++-------- test/tool/tracer_tool.cpp | 105 +++++++++++++++++----------------- 4 files changed, 169 insertions(+), 101 deletions(-) diff --git a/inc/roctracer.h b/inc/roctracer.h index 1db73cee77..deffb0f6d3 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -250,20 +250,14 @@ roctracer_status_t roctracer_get_timestamp( uint64_t* timestamp); // [out] return timestamp // Load/Unload methods +bool roctracer_load(); +void roctracer_unload(); + // Set properties roctracer_status_t roctracer_set_properties( roctracer_domain_t domain, // tracing domain void* propertes); // tracing properties -typedef struct HsaApiTable HsaApiTable; -bool roctracer_load( - HsaApiTable* table, - uint64_t runtime_version, - uint64_t failed_tool_count, - const char* const* failed_tool_names); - -void roctracer_unload(bool destruct); - #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 7221be3460..b975117ae6 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -548,6 +548,7 @@ unsigned set_stopped(unsigned val) { } // namespace roctracer LOADER_INSTANTIATE(); +TRACE_BUFFER_INSTANTIATE(); /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods @@ -989,6 +990,7 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); memory_pool->Flush(); + roctracer::TraceBufferBase::FlushAll(); API_METHOD_SUFFIX } @@ -1045,6 +1047,12 @@ PUBLIC_API void roctracer_stop() { } } +PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) { + API_METHOD_PREFIX + *timestamp = util::HsaRsrcFactory::Instance().TimestampNs(); + API_METHOD_SUFFIX +} + // Set properties PUBLIC_API roctracer_status_t roctracer_set_properties( roctracer_domain_t domain, @@ -1053,6 +1061,8 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { + roctracer::trace_buffer.StartWorkerThread(); + // HSA OPS properties roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); HsaApiTable* table = reinterpret_cast(ops_properties->table); @@ -1112,11 +1122,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( API_METHOD_SUFFIX } -// HSA-runtime tool on-load method -PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - ONLOAD_TRACE_BEG(); +PUBLIC_API bool roctracer_load() { static bool is_loaded = false; + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); + if (is_loaded) return true; is_loaded = true; @@ -1124,11 +1133,10 @@ PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uin return true; } -PUBLIC_API void roctracer_unload(bool destruct) { +PUBLIC_API void roctracer_unload() { static bool is_unloaded = false; - ONLOAD_TRACE("begin (" << destruct << ", " << is_unloaded << ")"); + ONLOAD_TRACE("begin, unloaded(" << is_unloaded << ")"); - if (destruct == false) return; if (is_unloaded == true) return; is_unloaded = true; @@ -1137,23 +1145,16 @@ PUBLIC_API void roctracer_unload(bool destruct) { ONLOAD_TRACE_END(); } -PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) { - API_METHOD_PREFIX - *timestamp = util::HsaRsrcFactory::Instance().TimestampNs(); - API_METHOD_SUFFIX -} - +// HSA-runtime tool on-load/unload methods PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { ONLOAD_TRACE_BEG(); - const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); + const bool ret = roctracer_load(); ONLOAD_TRACE_END(); return ret; } PUBLIC_API void OnUnload() { - ONLOAD_TRACE_BEG(); - roctracer_unload(false); - ONLOAD_TRACE_END(); + ONLOAD_TRACE("done"); } CONSTRUCTOR_API void constructor() { @@ -1166,7 +1167,7 @@ CONSTRUCTOR_API void constructor() { DESTRUCTOR_API void destructor() { ONLOAD_TRACE_BEG(); - roctracer_unload(true); + roctracer_unload(); util::HsaRsrcFactory::Destroy(); roctracer::util::Logger::Destroy(); ONLOAD_TRACE_END(); diff --git a/src/core/trace_buffer.h b/src/core/trace_buffer.h index 0cbcb5bdda..05bc3c47b6 100644 --- a/src/core/trace_buffer.h +++ b/src/core/trace_buffer.h @@ -2,12 +2,23 @@ #define SRC_CORE_TRACE_BUFFER_H_ #include +#include #include #include +#include + #include #include #include +#define FATAL(stream) \ + do { \ + std::ostringstream oss; \ + oss << __FUNCTION__ << "(), " << stream; \ + std::cout << oss.str() << std::endl; \ + abort(); \ + } while (0) + #define PTHREAD_CALL(call) \ do { \ int err = call; \ @@ -53,8 +64,55 @@ struct trace_entry_t { }; }; +template +struct push_element_fun { + T* const elem_; + void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; } + push_element_fun(T* elem) : elem_(elem) {} +}; + +template +struct call_element_fun { + void (T::*fptr_)(); + void fun(T* node) { (node->*fptr_)(); } + call_element_fun(void (T::*f)()) : fptr_(f) {} +}; + +struct TraceBufferBase { + typedef std::mutex mutex_t; + + virtual void StartWorkerThread() = 0; + virtual void Flush() = 0; + + static void StartWorkerThreadAll() { foreach(call_element_fun(&TraceBufferBase::StartWorkerThread)); } + static void FlushAll() { foreach(call_element_fun(&TraceBufferBase::Flush)); } + + static void Push(TraceBufferBase* elem) { + if (head_elem_ == NULL) head_elem_ = elem; + else foreach(push_element_fun(elem)); + } + + TraceBufferBase() : next_elem_(NULL) {} + + template + static void foreach(const F& f_in) { + std::lock_guard lck(mutex_); + F f = f_in; + TraceBufferBase* p = head_elem_; + while (p != NULL) { + TraceBufferBase* next = p->next_elem_; + f.fun(p); + p = next; + } + } + + TraceBufferBase* next_elem_; + static TraceBufferBase* head_elem_; + static mutex_t mutex_; +}; + template -class TraceBuffer { +class TraceBuffer : protected TraceBufferBase { public: typedef void (*callback_t)(Entry*); typedef TraceBuffer Obj; @@ -67,7 +125,8 @@ class TraceBuffer { }; TraceBuffer(const char* name, uint32_t size, flush_prm_t* flush_prm_arr, uint32_t flush_prm_count) : - is_flushed_(false) + is_flushed_(false), + work_thread_started_(false) { name_ = strdup(name); size_ = size; @@ -80,31 +139,43 @@ class TraceBuffer { flush_prm_arr_ = flush_prm_arr; flush_prm_count_ = flush_prm_count; - PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL)); - PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL)); - PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this)); + TraceBufferBase::Push(this); } ~TraceBuffer() { - PTHREAD_CALL(pthread_cancel(work_thread_)); - void *res; - PTHREAD_CALL(pthread_join(work_thread_, &res)); - if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly"); - + StopWorkerThread(); Flush(); } + void StartWorkerThread() { + std::lock_guard lck(mutex_); + if (work_thread_started_ == false) { + PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL)); + PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL)); + PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this)); + work_thread_started_ = true; + } + } + + void StopWorkerThread() { + std::lock_guard lck(mutex_); + if (work_thread_started_ == true) { + PTHREAD_CALL(pthread_cancel(work_thread_)); + void *res; + PTHREAD_CALL(pthread_join(work_thread_, &res)); + if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly"); + work_thread_started_ = false; + } + } Entry* GetEntry() { const pointer_t pointer = read_pointer_.fetch_add(1); if (pointer >= end_pointer_) wrap_buffer(pointer); - if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap"); + if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap"); return data_ + (pointer + size_ - end_pointer_); } - void Flush() { - flush_buf(); - } + void Flush() { flush_buf(); } private: void flush_buf() { @@ -134,7 +205,7 @@ class TraceBuffer { inline Entry* allocate_fun() { Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry)); - if (ptr == NULL) abort_run("TraceBuffer::allocate_fun: calloc failed"); + if (ptr == NULL) FATAL("malloc failed"); //memset(ptr, 0, size_ * sizeof(Entry)); return ptr; } @@ -156,24 +227,20 @@ class TraceBuffer { void wrap_buffer(const pointer_t pointer) { std::lock_guard lck(mutex_); + if (work_thread_started_ == false) FATAL("worker thread is not started"); + PTHREAD_CALL(pthread_mutex_lock(&work_mutex_)); if (pointer >= end_pointer_) { data_ = next_; next_ = NULL; PTHREAD_CALL(pthread_cond_signal(&work_cond_)); end_pointer_ += size_; - if (end_pointer_ == 0) abort_run("TraceBuffer::wrap_buffer: pointer overflow"); + if (end_pointer_ == 0) FATAL("pointer overflow"); buf_list_.push_back(data_); } PTHREAD_CALL(pthread_mutex_unlock(&work_mutex_)); } - void abort_run(const char* str) { - fprintf(stderr, "%s\n", str); - fflush(stderr); - abort(); - } - const char* name_; uint32_t size_; Entry* data_; @@ -189,9 +256,14 @@ class TraceBuffer { pthread_t work_thread_; pthread_mutex_t work_mutex_; pthread_cond_t work_cond_; + bool work_thread_started_; mutex_t mutex_; }; } // namespace roctracer +#define TRACE_BUFFER_INSTANTIATE() \ + roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \ + roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_; + #endif // SRC_CORE_TRACE_BUFFER_H_ diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 75c72dd09f..fc79195f6f 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -77,6 +77,7 @@ bool trace_hip_activity = false; bool trace_kfd = false; LOADER_INSTANTIATE(); +TRACE_BUFFER_INSTANTIATE(); // Global output file handle FILE* roctx_file_handle = NULL; @@ -524,9 +525,55 @@ void close_output_file(FILE* file_handle) { if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle); } +// tool unload method +void tool_unload() { + static bool is_unloaded = false; + ONLOAD_TRACE("begin, unloaded(" << is_unloaded << ")"); + + if (is_unloaded == true) return; + is_unloaded = true; + + roctracer_unload(); + + if (trace_roctx) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); + + roctx_trace_buffer.Flush(); + close_output_file(roctx_file_handle); + } + if (trace_hsa_api) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); + + hsa_api_trace_buffer.Flush(); + close_output_file(hsa_api_file_handle); + } + if (trace_hsa_activity) { + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + + close_output_file(hsa_async_copy_file_handle); + } + if (trace_hip_api || trace_hip_activity) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + ROCTRACER_CALL(roctracer_flush_activity()); + ROCTRACER_CALL(roctracer_close_pool()); + + hip_api_trace_buffer.Flush(); + close_output_file(hip_api_file_handle); + close_output_file(hcc_activity_file_handle); + } + + if (trace_kfd) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); + fclose(kfd_api_file_handle); + } + ONLOAD_TRACE_END(); +} + // HSA-runtime tool on-load method extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { + const char* const* failed_tool_names) { ONLOAD_TRACE_BEG(); timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); @@ -758,61 +805,15 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } + roctracer::TraceBufferBase::StartWorkerThreadAll(); + const bool ret = roctracer_load(); ONLOAD_TRACE_END(); - return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); -} - -// tool unload method -void tool_unload(bool destruct) { - static bool is_unloaded = false; - ONLOAD_TRACE("begin (" << destruct <<", " << is_unloaded << ")"); - - if (destruct == false) return; - if (is_unloaded == true) return; - is_unloaded = true; - roctracer_unload(destruct); - - if (trace_roctx) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); - - roctx_trace_buffer.Flush(); - close_output_file(roctx_file_handle); - } - if (trace_hsa_api) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); - - hsa_api_trace_buffer.Flush(); - close_output_file(hsa_api_file_handle); - } - if (trace_hsa_activity) { - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); - - close_output_file(hsa_async_copy_file_handle); - } - if (trace_hip_api || trace_hip_activity) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_flush_activity()); - ROCTRACER_CALL(roctracer_close_pool()); - - hip_api_trace_buffer.Flush(); - close_output_file(hip_api_file_handle); - close_output_file(hcc_activity_file_handle); - } - - if (trace_kfd) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); - fclose(kfd_api_file_handle); - } - ONLOAD_TRACE_END(); + return ret; } // HSA-runtime on-unload method extern "C" PUBLIC_API void OnUnload() { - ONLOAD_TRACE_BEG(); - tool_unload(false); - ONLOAD_TRACE_END(); + ONLOAD_TRACE(""); } extern "C" CONSTRUCTOR_API void constructor() { @@ -820,6 +821,6 @@ extern "C" CONSTRUCTOR_API void constructor() { } extern "C" DESTRUCTOR_API void destructor() { ONLOAD_TRACE_BEG(); - tool_unload(true); + tool_unload(); ONLOAD_TRACE_END(); } From c73b98c2c536de18852f0a16ce8da001e6339530 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 27 Jan 2020 20:42:22 -0600 Subject: [PATCH 78/94] periodic trace flushing --- test/run.sh | 1 + test/tool/tracer_tool.cpp | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/test/run.sh b/test/run.sh index 7b7d5109b3..007ee8ffeb 100755 --- a/test/run.sh +++ b/test/run.sh @@ -81,6 +81,7 @@ eval_test "tool SYS/HSA test" ./test/MatrixTranspose # Tracing control export ROCTRACER_DOMAIN="hip" eval_test "tool period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" +eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" # HSA test export ROCTRACER_DOMAIN="hsa" diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index fc79195f6f..ba1f117bf1 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -123,19 +123,19 @@ void* control_thr_fun(void*) { const uint32_t len_us = control_len_us % 1000000; const uint32_t dist_sec = control_dist_us / 1000000; const uint32_t dist_us = control_dist_us % 1000000; - bool start = true; + bool to_start = true; sleep(delay_sec); usleep(delay_us); while (1) { - if (start) { - start = false; + if (to_start) { + to_start = false; roctracer_start(); sleep(len_sec); usleep(len_us); } else { - start = true; + to_start = true; roctracer_stop(); sleep(dist_sec); usleep(dist_us); @@ -143,6 +143,20 @@ void* control_thr_fun(void*) { } } +// Flushing control thread +uint32_t control_flush_us = 0; +void* flush_thr_fun(void*) { + const uint32_t dist_sec = control_flush_us / 1000000; + const uint32_t dist_us = control_flush_us % 1000000; + + while (1) { + sleep(dist_sec); + usleep(dist_us); + roctracer_flush_activity(); + roctracer::TraceBufferBase::FlushAll(); + } +} + /////////////////////////////////////////////////////////////////////////////////////////////////////// // rocTX annotation tracing @@ -784,6 +798,22 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } + const char* flush_str = getenv("ROCP_FLUSH_RATE"); + if (flush_str != NULL) { + sscanf(flush_str, "%d", &control_flush_us); + if (control_flush_us == 0) { + fprintf(stderr, "ROCTracer: control flush rate bad value\n"); + abort(); + } + + fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout); + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, flush_thr_fun, NULL); + } + // Enable KFD API callbacks/activity if (trace_kfd) { kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); From e8e41428c71d4666082ee8dea5e411bd2803c447 Mon Sep 17 00:00:00 2001 From: rkebichi <54912798+rkebichi@users.noreply.github.com> Date: Tue, 28 Jan 2020 12:48:14 -0500 Subject: [PATCH 79/94] Update CMakeLists.txt --- src/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 072ded4994..0bdb1af43e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,8 +22,8 @@ if ( DEFINED KFD_WRAPPER ) set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) - execute_process ( COMMAND sh -xc "gcc -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${ROOT_DIR}/inc/hsakmttypes_pp.h" ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${ROOT_DIR}/inc/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) + execute_process ( COMMAND sh -xc "gcc -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) From 89b292ee5c8a495a5a7220e48c209de4de769324 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 28 Jan 2020 14:21:39 -0600 Subject: [PATCH 80/94] using CMAKE_CXX_COMPILER intead of gcc --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cdbe606fe5..2dd3ed11d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ set ( KFD_LIB "kfdwrapper64" ) set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) -execute_process ( COMMAND sh -xc "gcc -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" ) +execute_process ( COMMAND sh -xc "${CMAKE_CXX_COMPILER} -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) From 50b47449aa823e42aa6929493a90ca2958f24ade Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 22:21:22 -0600 Subject: [PATCH 81/94] git ignore generated inc/basic_ostream_ops.h --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bd206b0038..446848fea5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ b build inc/hsa_prof_str.h inc/kfd_prof_str.h +inc/basic_ostream_ops.h inc/kfd_ostream_ops.h test/hsa test/MatrixTranspose/MatrixTranspose From 05315465cfa0510558caa418b4cb678e9ea8d674 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 22:39:22 -0600 Subject: [PATCH 82/94] hsa_rsrc_factory sync --- src/util/hsa_rsrc_factory.cpp | 90 +++++++++++++++++++++++++----- src/util/hsa_rsrc_factory.h | 100 +++++++++++++++++++++++++++++++++- 2 files changed, 176 insertions(+), 14 deletions(-) diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index ccb1cd9de3..e1ef92683e 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -44,9 +44,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#include "util/exception.h" -#include "util/logger.h" - namespace util { // Callback function to get available in the system agents @@ -149,6 +146,11 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CHECK_STATUS("HSA timer allocation failed", (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + // Time correlation + const uint32_t corr_iters = 1000; + CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters); + CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); + // System timeout timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } @@ -192,6 +194,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; + hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; @@ -230,6 +234,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; hsa_api_.hsa_executable_freeze = hsa_executable_freeze; hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; + hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; hsa_api_.hsa_system_get_info = hsa_system_get_info; hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; @@ -336,6 +342,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); + // GFX8 and GFX9 SGPR/VGPR block sizes + agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; + agent_info->sgpr_block_size = 8; + agent_info->vgpr_block_size = 4; + // Set GPU index agent_info->dev_index = gpu_list_.size(); gpu_list_.push_back(agent_info); @@ -508,22 +519,25 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s } // Wait signal -void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + const hsa_signal_value_t exp_value = signal_value - 1; + hsa_signal_value_t ret_value = signal_value; while (1) { - const hsa_signal_value_t signal_value = - hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); - if (signal_value == 0) { - break; - } else { - if (signal_value == 1) WARN_LOGGING("signal waiting..."); - else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); + ret_value = + hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + if (ret_value == exp_value) break; + if (ret_value != signal_value) { + std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value + << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + abort(); } } + return ret_value; } // Wait signal with signal value restore void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { - SignalWait(signal); + SignalWait(signal, signal_value); hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } @@ -536,7 +550,7 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src CHECK_STATUS("hsa_signal_create()", status); status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); CHECK_STATUS("hsa_amd_memory_async_copy()", status); - SignalWait(s); + SignalWait(s, 1); status = hsa_api_.hsa_signal_destroy(s); CHECK_STATUS("hsa_signal_destroy()", status); } @@ -680,9 +694,59 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s return write_idx; } +const char* HsaRsrcFactory::GetKernelName(uint64_t addr) { + std::lock_guard lck(mutex_); + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); + abort(); + } + return strdup(it->second); +} + +void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { + std::lock_guard lck(mutex_); + executable_tracking_on_ = true; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; +} + +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { + hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; + hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + CHECK_STATUS("Error in getting symbol info", status); + if (value == HSA_SYMBOL_KIND_KERNEL) { + uint64_t addr = 0; + uint32_t len = 0; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + CHECK_STATUS("Error in getting kernel object", status); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + CHECK_STATUS("Error in getting name len", status); + char *name = new char[len + 1]; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); + CHECK_STATUS("Error in getting kernel name", status); + name[len] = 0; + auto ret = symbols_map_->insert({addr, name}); + if (ret.second == false) { + delete[] ret.first->second; + ret.first->second = name; + } + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { + std::lock_guard lck(mutex_); + if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + CHECK_STATUS("Error in iterating executable symbols", status); + return hsa_api_.hsa_executable_freeze(executable, options);; +} + std::atomic HsaRsrcFactory::instance_{}; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; hsa_pfn_t HsaRsrcFactory::hsa_api_{}; +bool HsaRsrcFactory::executable_tracking_on_ = false; +HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL; } // namespace util diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 51824a5212..c52715d4e6 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -94,6 +95,8 @@ struct hsa_pfn_t { decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; decltype(hsa_executable_freeze)* hsa_executable_freeze; decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; + decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; + decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; decltype(hsa_system_get_info)* hsa_system_get_info; decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; @@ -159,6 +162,11 @@ struct AgentInfo { // Number of Shader Arrays Per Shader Engines in Gpu uint32_t shader_arrays_per_se; + + // SGPR/VGPR block sizes + uint32_t sgpr_block_dflt; + uint32_t sgpr_block_size; + uint32_t vgpr_block_size; }; // HSA timer class @@ -169,6 +177,12 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; + enum time_id_t { + TIME_ID_CLOCK_REALTIME = 0, + TIME_ID_CLOCK_MONOTONIC = 1, + TIME_ID_NUMBER + }; + HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); @@ -184,6 +198,11 @@ class HsaTimer { return timestamp_t((freq_t)time / sysclock_factor_); } + // Method for timespec/ns conversion + timestamp_t timespec_to_ns(const timespec& time) const { + return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; + } + // Return timestamp in 'ns' timestamp_t timestamp_ns() const { timestamp_t sysclock; @@ -192,6 +211,54 @@ class HsaTimer { return sysclock_to_ns(sysclock); } + // Return time in 'ns' + timestamp_t clocktime_ns(clockid_t clock_id) const { + timespec time; + clock_gettime(clock_id, &time); + return timespec_to_ns(time); + } + + // Return pair of correlated values of profiling timestamp and time with + // correlation error for a given time ID and number of iterations + void correlated_pair_ns(time_id_t time_id, uint32_t iters, + timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) { + clockid_t clock_id = 0; + switch (clock_id) { + case TIME_ID_CLOCK_REALTIME: + clock_id = CLOCK_REALTIME; + break; + case TIME_ID_CLOCK_MONOTONIC: + clock_id = CLOCK_MONOTONIC; + break; + default: + CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); + } + + std::vector ts_vec(iters); + std::vector tm_vec(iters); + const uint32_t steps = iters - 1; + + for (uint32_t i = 0; i < iters; ++i) { + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); + clock_gettime(clock_id, &tm_vec[i]); + } + + const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); + const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); + const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); + + timestamp_t ts_accum = 0; + timestamp_t tm_accum = 0; + for (uint32_t i = 0; i < iters; ++i) { + ts_accum += (ts_vec[i] - ts_base); + tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); + } + + *timestamp_v = (ts_accum / iters) + ts_base + error; + *time_v = (tm_accum / iters) + tm_base; + *error_v = error; + } + private: // Timestamp frequency factor freq_t sysclock_factor_; @@ -293,7 +360,7 @@ class HsaRsrcFactory { uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); // Wait signal - void SignalWait(const hsa_signal_t& signal) const; + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; // Wait signal with signal value restore void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; @@ -322,6 +389,11 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet); static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); + // Enable executables loading tracking + static bool IsExecutableTracking() { return executable_tracking_on_; } + static void EnableExecutableTracking(HsaApiTable* table); + static const char* GetKernelName(uint64_t addr); + // Initialize HSA API table void static InitHsaApiTable(HsaApiTable* table); static const hsa_pfn_t* HsaApi() { return &hsa_api_; } @@ -346,6 +418,21 @@ class HsaRsrcFactory { if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); } + void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) { + timestamp_t timestamp_v = 0; + timestamp_t time_v = 0; + timestamp_t error_v = 0; + timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); + time_shift_[time_id] = time_v - timestamp_v; + time_error_[time_id] = error_v; + } + + hsa_status_t GetTime(uint32_t time_id, uint64_t value, uint64_t* time) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *time = value + time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + private: // System agents iterating callback static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); @@ -386,6 +473,13 @@ class HsaRsrcFactory { // System agents map std::map agent_map_; + // Executables loading tracking + typedef std::map symbols_map_t; + static symbols_map_t* symbols_map_; + static bool executable_tracking_on_; + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + // HSA runtime API table static hsa_pfn_t hsa_api_; @@ -403,6 +497,10 @@ class HsaRsrcFactory { // HSA timer HsaTimer* timer_; + // Time shift array to support time conversion + timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; + timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; + // CPU/kern-arg memory pools hsa_amd_memory_pool_t *cpu_pool_; hsa_amd_memory_pool_t *kern_arg_pool_; From b43eef92be15b91c8dbeb3f884cb90b6778c8517 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 22:44:41 -0600 Subject: [PATCH 83/94] hsa_rsrc_factory sync test --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 38750ba5a9..8cff137c25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -54,7 +54,7 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout a3aabb5; fi" ) +execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout fff0102; fi" ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa ) From e9087dcd8601613af1dfca8aba8cf4260a1ba419 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 23:39:08 -0600 Subject: [PATCH 84/94] saving time for roctx records --- src/util/hsa_rsrc_factory.h | 14 ++++++++++---- test/CMakeLists.txt | 2 +- test/tool/tracer_tool.cpp | 11 +++++++---- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index c52715d4e6..466ccf1f95 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -199,7 +199,7 @@ class HsaTimer { } // Method for timespec/ns conversion - timestamp_t timespec_to_ns(const timespec& time) const { + static timestamp_t timespec_to_ns(const timespec& time) { return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; } @@ -212,7 +212,7 @@ class HsaTimer { } // Return time in 'ns' - timestamp_t clocktime_ns(clockid_t clock_id) const { + static timestamp_t clocktime_ns(clockid_t clock_id) { timespec time; clock_gettime(clock_id, &time); return timespec_to_ns(time); @@ -221,7 +221,7 @@ class HsaTimer { // Return pair of correlated values of profiling timestamp and time with // correlation error for a given time ID and number of iterations void correlated_pair_ns(time_id_t time_id, uint32_t iters, - timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) { + timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const { clockid_t clock_id = 0; switch (clock_id) { case TIME_ID_CLOCK_REALTIME: @@ -427,12 +427,18 @@ class HsaRsrcFactory { time_error_[time_id] = error_v; } - hsa_status_t GetTime(uint32_t time_id, uint64_t value, uint64_t* time) { + hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) { if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; *time = value + time_shift_[time_id]; return HSA_STATUS_SUCCESS; } + hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *timestamp = value - time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + private: // System agents iterating callback static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8cff137c25..03a8695f15 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -54,7 +54,7 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout fff0102; fi" ) +execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout 777c308; fi" ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa ) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index ba1f117bf1..33b8210914 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -43,6 +43,8 @@ THE SOFTWARE. #include #include +#include "util/hsa_rsrc_factory.h" + #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) @@ -164,7 +166,7 @@ struct roctx_trace_entry_t { uint32_t valid; uint32_t type; uint32_t cid; - timestamp_t timestamp; + timestamp_t time; uint32_t pid; uint32_t tid; const char* message; @@ -181,12 +183,11 @@ static inline void roctx_callback_fun( uint32_t tid, const char* message) { - const timestamp_t timestamp = timer->timestamp_fn_ns(); roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); entry->valid = roctracer::TRACE_ENTRY_COMPL; entry->type = 0; entry->cid = cid; - entry->timestamp = timestamp; + entry->time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC); entry->pid = GetPid(); entry->tid = tid; entry->message = (message != NULL) ? strdup(message) : NULL; @@ -215,8 +216,10 @@ void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roct // rocTX buffer flush function void roctx_flush_cb(roctx_trace_entry_t* entry) { + timestamp_t timestamp = 0; + HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp); std::ostringstream os; - os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; + os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; if (entry->message != NULL) os << ":\"" << entry->message << "\""; else os << ":\"\""; fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle); From f22bf1e97250ddb653d9a9668bf0a94d78c04a77 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 29 Jan 2020 23:46:38 -0600 Subject: [PATCH 85/94] constructor partial initialization --- test/tool/tracer_tool.cpp | 162 +++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 73 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 33b8210914..14373f93e0 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -77,6 +77,9 @@ bool trace_hsa_activity = false; bool trace_hip_api = false; bool trace_hip_activity = false; bool trace_kfd = false; +// API trace vector +std::vector hsa_api_vec; +std::vector kfd_api_vec; LOADER_INSTANTIATE(); TRACE_BUFFER_INSTANTIATE(); @@ -585,14 +588,17 @@ void tool_unload() { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); fclose(kfd_api_file_handle); } + ONLOAD_TRACE_END(); } -// HSA-runtime tool on-load method -extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - ONLOAD_TRACE_BEG(); - timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); +// tool load method +void tool_load() { + static bool is_loaded = false; + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); + + if (is_loaded == true) return; + is_loaded = true; // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); @@ -635,10 +641,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } - // API trace vector - std::vector hsa_api_vec; - std::vector kfd_api_vec; - printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); // XML input @@ -710,67 +712,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); } - // Enable HSA API callbacks/activity - if (trace_hsa_api) { - hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); - - // initialize HSA tracing - roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table); - - fprintf(stdout, " HSA-trace("); fflush(stdout); - if (hsa_api_vec.size() != 0) { - for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { - uint32_t cid = HSA_API_ID_NUMBER; - const char* api = hsa_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL)); - ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); - printf(" %s", api); - } - } else { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL)); - } - printf(")\n"); - } - - // Enable HSA GPU activity - if (trace_hsa_activity) { - hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); - - // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties { - table, - reinterpret_cast(hsa_activity_callback), - NULL, - output_prefix - }; - roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); - - fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); - } - - // Enable HIP API callbacks/activity - if (trace_hip_api || trace_hip_activity) { - hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); - hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); - - fprintf(stdout, " HIP-trace()\n"); fflush(stdout); - // roctracer properties - roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); - // Allocating tracing pool - roctracer_properties_t properties{}; - properties.buffer_size = 0x80000; - properties.buffer_callback_fun = hcc_activity_callback; - ROCTRACER_CALL(roctracer_open_pool(&properties)); - if (trace_hip_api) { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); - } - if (trace_hip_activity) { - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - } - } - const char* ctrl_str = getenv("ROCP_CTRL_RATE"); if (ctrl_str != NULL) { uint32_t ctrl_delay = 0; @@ -839,9 +780,82 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } roctracer::TraceBufferBase::StartWorkerThreadAll(); - const bool ret = roctracer_load(); + roctracer_load(); + ONLOAD_TRACE_END(); - return ret; +} + +// HSA-runtime tool on-load method +extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { + ONLOAD_TRACE_BEG(); + timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); + + const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); + + // Enable HSA API callbacks/activity + if (trace_hsa_api) { + hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); + + // initialize HSA tracing + roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table); + + fprintf(stdout, " HSA-trace("); fflush(stdout); + if (hsa_api_vec.size() != 0) { + for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { + uint32_t cid = HSA_API_ID_NUMBER; + const char* api = hsa_api_vec[i].c_str(); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL)); + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); + printf(" %s", api); + } + } else { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL)); + } + printf(")\n"); + } + + // Enable HSA GPU activity + if (trace_hsa_activity) { + hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); + + // initialize HSA tracing + roctracer::hsa_ops_properties_t ops_properties { + table, + reinterpret_cast(hsa_activity_callback), + NULL, + output_prefix + }; + roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); + + fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + } + + // Enable HIP API callbacks/activity + if (trace_hip_api || trace_hip_activity) { + hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); + hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); + + fprintf(stdout, " HIP-trace()\n"); fflush(stdout); + // roctracer properties + roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 0x80000; + properties.buffer_callback_fun = hcc_activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + if (trace_hip_api) { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + } + if (trace_hip_activity) { + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + } + } + + ONLOAD_TRACE_END(); + return true; } // HSA-runtime on-unload method @@ -850,7 +864,9 @@ extern "C" PUBLIC_API void OnUnload() { } extern "C" CONSTRUCTOR_API void constructor() { - ONLOAD_TRACE("") + ONLOAD_TRACE_BEG(); + tool_load(); + ONLOAD_TRACE_END(); } extern "C" DESTRUCTOR_API void destructor() { ONLOAD_TRACE_BEG(); From eb91d35df2d0204d314dd231dce1809d5ebcfaee Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 13 Feb 2020 21:03:14 -0600 Subject: [PATCH 86/94] roctx condition for timestamp or clocktime --- test/tool/tracer_tool.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 14373f93e0..b0850d0d2c 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -186,11 +186,16 @@ static inline void roctx_callback_fun( uint32_t tid, const char* message) { +#if ROCTX_CLOCK_TIME + const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC); +#else + const timestamp_t time = timer->timestamp_fn_ns(); +#endif roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); entry->valid = roctracer::TRACE_ENTRY_COMPL; entry->type = 0; entry->cid = cid; - entry->time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC); + entry->time = time; entry->pid = GetPid(); entry->tid = tid; entry->message = (message != NULL) ? strdup(message) : NULL; @@ -219,8 +224,12 @@ void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roct // rocTX buffer flush function void roctx_flush_cb(roctx_trace_entry_t* entry) { +#if ROCTX_CLOCK_TIME timestamp_t timestamp = 0; HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp); +#else + const timestamp_t timestamp = entry->time; +#endif std::ostringstream os; os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; if (entry->message != NULL) os << ":\"" << entry->message << "\""; From b4f2aa1a1c7bd85166b4e00402296aa2a1d19448 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 27 Feb 2020 13:39:56 -0600 Subject: [PATCH 87/94] hcc_ops_trace.txt on request --- test/tool/tracer_tool.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index b0850d0d2c..8c4b5a16d7 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -97,12 +97,12 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler void fatal(const std::string msg) { - fflush(roctx_file_handle); - fflush(hsa_api_file_handle); - fflush(hsa_async_copy_file_handle); - fflush(hip_api_file_handle); - fflush(hcc_activity_file_handle); - fflush(kfd_api_file_handle); + if (roctx_file_handle) fflush(roctx_file_handle); + if (hsa_api_file_handle) fflush(hsa_api_file_handle); + if (hsa_async_copy_file_handle) fflush(hsa_async_copy_file_handle); + if (hip_api_file_handle) fflush(hip_api_file_handle); + if (hcc_activity_file_handle) fflush(hcc_activity_file_handle); + if (kfd_api_file_handle) fflush(kfd_api_file_handle); fflush(stdout); fprintf(stderr, "%s\n\n", msg.c_str()); fflush(stderr); @@ -589,8 +589,8 @@ void tool_unload() { ROCTRACER_CALL(roctracer_close_pool()); hip_api_trace_buffer.Flush(); - close_output_file(hip_api_file_handle); - close_output_file(hcc_activity_file_handle); + if (hip_api_file_handle) close_output_file(hip_api_file_handle); + if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle); } if (trace_kfd) { @@ -843,8 +843,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { - hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); - hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); fprintf(stdout, " HIP-trace()\n"); fflush(stdout); // roctracer properties @@ -855,10 +853,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, properties.buffer_callback_fun = hcc_activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); if (trace_hip_api) { + hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); } if (trace_hip_activity) { + hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); } } From 8b63135cedd01c3b2d0926b2161300c8f825aa56 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 23 Jan 2020 20:55:49 -0600 Subject: [PATCH 88/94] PC sampling initial bringup --- inc/ext/prof_protocol.h | 5 ++ inc/roctracer.h | 1 + inc/roctracer_hsa.h | 9 +++ script/hsaap.py | 4 +- src/core/loader.h | 30 +++++++ src/core/roctracer.cpp | 78 ++++++++++++------- test/MatrixTranspose_test/Makefile | 2 +- test/MatrixTranspose_test/MatrixTranspose.cpp | 8 ++ 8 files changed, 107 insertions(+), 30 deletions(-) diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index c578df0fd4..b6384b42ee 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -80,6 +80,11 @@ typedef struct activity_record_s { struct { activity_correlation_id_t external_id; // external correlatino id }; + struct { + uint32_t se; // sampled SE + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC + } ps_sample; }; size_t bytes; // data size bytes } activity_record_t; diff --git a/inc/roctracer.h b/inc/roctracer.h index deffb0f6d3..8b0f2114f9 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -67,6 +67,7 @@ typedef enum { ROCTRACER_STATUS_BAD_PARAMETER = 5, ROCTRACER_STATUS_HIP_API_ERR = 6, ROCTRACER_STATUS_HCC_OPS_ERR = 7, + ROCTRACER_STATUS_HSA_ERR = 7, ROCTRACER_STATUS_ROCTX_ERR = 8, } roctracer_status_t; diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 8531ab51dc..625fa0b760 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -28,6 +28,15 @@ THE SOFTWARE. #include "roctracer.h" +// HSA OP ID enumeration +enum hsa_op_id_t { + HSA_OP_ID_DISPATCH = 0, + HSA_OP_ID_COPY = 1, + HSA_OP_ID_BARRIER = 2, + HSA_OP_ID_PCSAMPLE = 3, + HSA_OP_ID_NUMBER = 4 +}; + #ifdef __cplusplus #include #include diff --git a/script/hsaap.py b/script/hsaap.py index 1e413a5295..f07c43d902 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -342,8 +342,8 @@ class API_DescrParser: self.content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n' else: self.content += '\n' - self.content += ' HSA_API_ID_NUMBER = ' + str(n) + ',\n' - self.content += ' HSA_API_ID_ANY = ' + str(n + 1) + ',\n' + self.content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n' + self.content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n' self.content += '};\n' # generate API args structure diff --git a/src/core/loader.h b/src/core/loader.h index 43b07be905..27a6fda55b 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -70,6 +70,33 @@ class BaseLoader : public T { void* handle_; }; +// 'rocprofiler' library loader class +class RocpApi { + public: + typedef BaseLoader Loader; + + typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg); + typedef bool (OperateCallback_t)(uint32_t op); + typedef bool (InitCallback_t)(void* callback, void* arg); + typedef bool (EnableCallback_t)(uint32_t op, bool enable); + typedef const char* (NameCallback_t)(uint32_t op); + + RegisterCallback_t* RegisterApiCallback; + OperateCallback_t* RemoveApiCallback; + InitCallback_t* InitActivityCallback; + EnableCallback_t* EnableActivityCallback; + NameCallback_t* GetOpName; + + protected: + void init(Loader* loader) { + RegisterApiCallback = loader->GetFun("RegisterApiCallback"); + RemoveApiCallback = loader->GetFun("RemoveApiCallback"); + InitActivityCallback = loader->GetFun("InitActivityCallback"); + EnableActivityCallback = loader->GetFun("EnableActivityCallback"); + GetOpName = loader->GetFun("GetOpName"); + } +}; + // HIP runtime library loader class class HipApi { public: @@ -164,6 +191,7 @@ class RocTxApi { } }; +typedef BaseLoader RocpLoader; typedef BaseLoader HipLoader; typedef BaseLoader HccLoader; typedef BaseLoader KfdLoader; @@ -176,6 +204,8 @@ typedef BaseLoader RocTxLoader; template std::atomic*> roctracer::BaseLoader::instance_{}; \ template bool roctracer::BaseLoader::to_load_ = false; \ template bool roctracer::BaseLoader::to_check_ = true; \ + template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ + template<> bool roctracer::RocpLoader::to_load_ = true; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> bool roctracer::HipLoader::to_check_ = false; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index b975117ae6..9b31c64055 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -394,8 +394,6 @@ void HCC_ActivityIdCallback(activity_correlation_id_t correlation_id) { } void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { - static hsa_rt_utils::Timer timer; - MemoryPool* pool = reinterpret_cast(arg); roctracer_record_t* record_ptr = reinterpret_cast(record); record_ptr->domain = ACTIVITY_DOMAIN_HCC_OPS; @@ -526,6 +524,13 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( return status; } +void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { + MemoryPool* pool = reinterpret_cast(arg); + roctracer_record_t* record_ptr = reinterpret_cast(record); + record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS; + pool->Write(*record_ptr); +} + // Logger routines and primitives util::Logger::mutex_t util::Logger::mutex_; std::atomic util::Logger::instance_{}; @@ -573,22 +578,16 @@ PUBLIC_API const char* roctracer_op_string( { API_METHOD_PREFIX switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: { + case ACTIVITY_DOMAIN_HSA_API: return roctracer::hsa_support::GetApiName(op); - break; - } - case ACTIVITY_DOMAIN_HCC_OPS: { + case ACTIVITY_DOMAIN_HSA_OPS: + return roctracer::RocpLoader::Instance().GetOpName(op); + case ACTIVITY_DOMAIN_HCC_OPS: return roctracer::HccLoader::Instance().GetOpName(kind); - break; - } - case ACTIVITY_DOMAIN_HIP_API: { + case ACTIVITY_DOMAIN_HIP_API: return roctracer::HipLoader::Instance().ApiName(op); - break; - } - case ACTIVITY_DOMAIN_KFD_API: { + case ACTIVITY_DOMAIN_KFD_API: return roctracer::kfd_support::GetApiName(op); - break; - } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -622,7 +621,7 @@ PUBLIC_API roctracer_status_t roctracer_op_code( static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: return 1; + case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; @@ -645,11 +644,16 @@ static roctracer_status_t roctracer_enable_callback_fun( switch (domain) { case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); + if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error(" << op << ") failed"); break; } case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed"); + break; + } roctracer::hsa_support::cb_table.set(op, callback, user_data); break; } @@ -658,13 +662,13 @@ static roctracer_status_t roctracer_enable_callback_fun( if (roctracer::HipLoader::Instance().Enabled() == false) break; hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRegisterApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed"); } break; } @@ -730,19 +734,26 @@ static roctracer_status_t roctracer_disable_callback_fun( break; } case ACTIVITY_DOMAIN_HSA_OPS: break; - case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_API: { + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed"); + break; + } + break; + } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRemoveApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed"); } break; } @@ -833,8 +844,17 @@ static roctracer_status_t roctracer_enable_activity_fun( if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = true; - rocprofiler::InterceptQueue::Enable(true); + if (op == HSA_OP_ID_DISPATCH) { + const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); + if (init_phase == true) { + roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, + (void*)pool); + } + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); + } else if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -921,8 +941,12 @@ static roctracer_status_t roctracer_disable_activity_fun( { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = false; - rocprofiler::InterceptQueue::Enable(false); + if (op == HSA_OP_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); + } else if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -931,14 +955,14 @@ static roctracer_status_t roctracer_disable_activity_fun( if (roctracer::HccLoader::Instance().Enabled() == false) break; const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")"); break; } case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: break; diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index d25f64340b..571725fd1d 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -41,7 +41,7 @@ $(EXECUTABLE): $(OBJECTS) $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) test: $(EXECUTABLE) - LD_PRELOAD=$(LIB_PATH)/libkfdwrapper64.so $(EXECUTABLE) + LD_PRELOAD="$(LIB_PATH)/libkfdwrapper64.so librocprofiler64.so" $(EXECUTABLE) clean: rm -f $(EXECUTABLE) diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index cc261f4312..3877a4dead 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -310,6 +310,12 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->queue_id ); if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + } else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) { + fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)", + record->ps_sample.se, + record->ps_sample.cycle, + record->ps_sample.pc + ); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)", record->external_id @@ -340,6 +346,8 @@ void init_tracing() { // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + // Enable PC sampling + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); // Enable KFD API tracing ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); From d2243ef1ecca377c7c08c2af34e8ae0814874a05 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 7 Mar 2020 19:09:54 -0600 Subject: [PATCH 89/94] pc sampling integration fix --- inc/ext/prof_protocol.h | 2 +- src/core/roctracer.cpp | 16 +++--- src/core/trace_buffer.h | 4 ++ test/tool/tracer_tool.cpp | 112 +++++++++++++++++++++++++------------- 4 files changed, 86 insertions(+), 48 deletions(-) diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index b6384b42ee..6a13724e95 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -84,7 +84,7 @@ typedef struct activity_record_s { uint32_t se; // sampled SE uint64_t cycle; // sample cycle uint64_t pc; // sample PC - } ps_sample; + } pc_sample; }; size_t bytes; // data size bytes } activity_record_t; diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 9b31c64055..846648dd45 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -844,7 +844,9 @@ static roctracer_status_t roctracer_enable_activity_fun( if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - if (op == HSA_OP_ID_DISPATCH) { + if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } else { const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); if (init_phase == true) { roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, @@ -852,8 +854,6 @@ static roctracer_status_t roctracer_enable_activity_fun( } const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); - } else if (op == HSA_OP_ID_COPY) { - roctracer::hsa_support::async_copy_callback_enabled = true; } break; } @@ -941,11 +941,11 @@ static roctracer_status_t roctracer_disable_activity_fun( { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - if (op == HSA_OP_ID_DISPATCH) { + if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } else { const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); - } else if (op == HSA_OP_ID_COPY) { - roctracer::hsa_support::async_copy_callback_enabled = true; } break; } @@ -976,8 +976,8 @@ static void roctracer_disable_activity_impl( uint32_t domain, uint32_t op) { - roctracer::act_journal->remove({domain, op, {}}); - roctracer_disable_activity_fun((roctracer_domain_t)domain, op); + roctracer::act_journal->remove({domain, op, {}}); + roctracer_disable_activity_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_activity( diff --git a/src/core/trace_buffer.h b/src/core/trace_buffer.h index 05bc3c47b6..fc778bb95e 100644 --- a/src/core/trace_buffer.h +++ b/src/core/trace_buffer.h @@ -184,8 +184,12 @@ class TraceBuffer : protected TraceBufferBase { if (is_flushed == false) { for (flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) { + // Flushed entries type uint32_t type = prm->type; + // Flushing function callback_t fun = prm->fun; + if (fun == NULL) FATAL("flush function is not set"); + pointer_t pointer = 0; for (Entry* ptr : buf_list_) { Entry* end = ptr + size_; diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 8c4b5a16d7..9ce34d549e 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -77,6 +77,7 @@ bool trace_hsa_activity = false; bool trace_hip_api = false; bool trace_hip_activity = false; bool trace_kfd = false; +bool trace_pcs = false; // API trace vector std::vector hsa_api_vec; std::vector kfd_api_vec; @@ -91,18 +92,25 @@ FILE* hsa_async_copy_file_handle = NULL; FILE* hip_api_file_handle = NULL; FILE* hcc_activity_file_handle = NULL; FILE* kfd_api_file_handle = NULL; +FILE* pc_sample_file_handle = NULL; + +void close_output_file(FILE* file_handle); +void close_file_handles() { + if (roctx_file_handle) close_output_file(roctx_file_handle); + if (hsa_api_file_handle) close_output_file(hsa_api_file_handle); + if (hsa_async_copy_file_handle) close_output_file(hsa_async_copy_file_handle); + if (hip_api_file_handle) close_output_file(hip_api_file_handle); + if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle); + if (kfd_api_file_handle) close_output_file(kfd_api_file_handle); + if (pc_sample_file_handle) close_output_file(pc_sample_file_handle); +} static inline uint32_t GetPid() { return syscall(__NR_getpid); } static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler void fatal(const std::string msg) { - if (roctx_file_handle) fflush(roctx_file_handle); - if (hsa_api_file_handle) fflush(hsa_api_file_handle); - if (hsa_async_copy_file_handle) fflush(hsa_async_copy_file_handle); - if (hip_api_file_handle) fflush(hip_api_file_handle); - if (hcc_activity_file_handle) fflush(hcc_activity_file_handle); - if (kfd_api_file_handle) fflush(kfd_api_file_handle); + close_file_handles(); fflush(stdout); fprintf(stderr, "%s\n\n", msg.c_str()); fflush(stderr); @@ -439,21 +447,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { // Activity tracing callback // hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) -void hcc_activity_callback(const char* begin, const char* end, void* arg) { +void pool_activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* record = reinterpret_cast(begin); const roctracer_record_t* end_record = reinterpret_cast(end); while (record < end_record) { const char * name = roctracer_op_string(record->domain, record->op, record->kind); - if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n", - record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id); - fflush(hcc_activity_file_handle); - } else { -#if 0 - fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n", - record->begin_ns, record->end_ns, record->process_id, record->thread_id, name); -#endif + switch(record->domain) { + case ACTIVITY_DOMAIN_HCC_OPS: + fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n", + record->begin_ns, record->end_ns, + record->device_id, record->queue_id, + name, record->correlation_id); + fflush(hcc_activity_file_handle); + break; + case ACTIVITY_DOMAIN_HSA_OPS: + if (record->op == HSA_OP_ID_PCSAMPLE) { + fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", + record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name); + fflush(pc_sample_file_handle); + } + break; } ROCTRACER_CALL(roctracer_next_record(record, &record)); } @@ -551,7 +565,28 @@ FILE* open_output_file(const char* prefix, const char* name) { } void close_output_file(FILE* file_handle) { - if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle); + if (file_handle != NULL) { + fflush(file_handle); + if (file_handle != stdout) fclose(file_handle); + } +} + +// Allocating tracing pool +void open_tracing_pool() { + if (roctracer_default_pool() == NULL) { + roctracer_properties_t properties{}; + properties.buffer_size = 0x80000; + properties.buffer_callback_fun = pool_activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + } +} + +// Flush tracing pool +void close_tracing_pool() { + if (roctracer_default_pool() != NULL) { + ROCTRACER_CALL(roctracer_flush_activity()); + ROCTRACER_CALL(roctracer_close_pool()); + } } // tool unload method @@ -566,38 +601,27 @@ void tool_unload() { if (trace_roctx) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); - - roctx_trace_buffer.Flush(); - close_output_file(roctx_file_handle); } if (trace_hsa_api) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); - - hsa_api_trace_buffer.Flush(); - close_output_file(hsa_api_file_handle); } - if (trace_hsa_activity) { + if (trace_hsa_activity || trace_pcs) { ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); - - close_output_file(hsa_async_copy_file_handle); } if (trace_hip_api || trace_hip_activity) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_flush_activity()); - ROCTRACER_CALL(roctracer_close_pool()); - - hip_api_trace_buffer.Flush(); - if (hip_api_file_handle) close_output_file(hip_api_file_handle); - if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle); } - if (trace_kfd) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); - fclose(kfd_api_file_handle); } + // Flush tracing pool + close_tracing_pool(); + roctracer::TraceBufferBase::FlushAll(); + close_file_handles(); + ONLOAD_TRACE_END(); } @@ -648,6 +672,11 @@ void tool_load() { if (std::string(trace_domain).find("kfd") != std::string::npos) { trace_kfd = true; } + + // PC sampling enabling + if (std::string(trace_domain).find("pcs") != std::string::npos) { + trace_pcs = true; + } } printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); @@ -843,15 +872,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { - fprintf(stdout, " HIP-trace()\n"); fflush(stdout); // roctracer properties roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); // Allocating tracing pool - roctracer_properties_t properties{}; - properties.buffer_size = 0x80000; - properties.buffer_callback_fun = hcc_activity_callback; - ROCTRACER_CALL(roctracer_open_pool(&properties)); + open_tracing_pool(); + // Enable tracing if (trace_hip_api) { hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); @@ -863,6 +889,14 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } + // Enable PC sampling + if (trace_pcs) { + fprintf(stdout, " PCS-trace()\n"); fflush(stdout); + open_tracing_pool(); + pc_sample_file_handle = open_output_file(output_prefix, "pc_sample_trace.txt"); + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); + } + ONLOAD_TRACE_END(); return true; } From c1455827b7f055eed974174787b31993a16b4240 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 7 Mar 2020 20:35:24 -0600 Subject: [PATCH 90/94] activity entry struct fix --- inc/ext/prof_protocol.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index 6a13724e95..c29ff0e65a 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -65,9 +65,18 @@ typedef struct activity_record_s { uint32_t domain; // activity domain id activity_kind_t kind; // activity kind activity_op_t op; // activity op - activity_correlation_id_t correlation_id; // activity ID - uint64_t begin_ns; // host begin timestamp - uint64_t end_ns; // host end timestamp + union { + struct { + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp + }; + struct { + uint32_t se; // sampled SE + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC + } pc_sample; + }; union { struct { int device_id; // device id @@ -80,11 +89,6 @@ typedef struct activity_record_s { struct { activity_correlation_id_t external_id; // external correlatino id }; - struct { - uint32_t se; // sampled SE - uint64_t cycle; // sample cycle - uint64_t pc; // sample PC - } pc_sample; }; size_t bytes; // data size bytes } activity_record_t; From 7965a02680717f5ceb77a9ab0047c7195341f825 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 9 Mar 2020 21:13:44 -0500 Subject: [PATCH 91/94] fixing rocprofiler activity enabling --- src/core/loader.h | 19 ++++++++++--------- src/core/roctracer.cpp | 13 +++++++++++++ test/MatrixTranspose_test/MatrixTranspose.cpp | 11 ++++++----- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/core/loader.h b/src/core/loader.h index 27a6fda55b..aa292418c4 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -21,11 +21,10 @@ class BaseLoader : public T { if (handle_ == NULL) return NULL; fun_t *f = (fun_t*) dlsym(handle_, fun_name); - if (f == NULL) { + if ((to_check_symb_ == true) && (f == NULL)) { fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror()); abort(); } - dlerror(); return f; } @@ -48,11 +47,10 @@ class BaseLoader : public T { BaseLoader() { const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); - if ((to_check_ == true) && (handle_ == NULL)) { + if ((to_check_open_ == true) && (handle_ == NULL)) { fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); abort(); } - dlerror(); T::init(this); } @@ -62,7 +60,8 @@ class BaseLoader : public T { } static bool to_load_; - static bool to_check_; + static bool to_check_open_; + static bool to_check_symb_; static mutex_t mutex_; static const char* lib_name_; @@ -203,13 +202,15 @@ typedef BaseLoader RocTxLoader; template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ template bool roctracer::BaseLoader::to_load_ = false; \ - template bool roctracer::BaseLoader::to_check_ = true; \ + template bool roctracer::BaseLoader::to_check_open_ = true; \ + template bool roctracer::BaseLoader::to_check_symb_ = true; \ template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ - template<> bool roctracer::RocpLoader::to_load_ = true; \ + template<> bool roctracer::RocpLoader::to_check_open_ = false; \ + template<> bool roctracer::RocpLoader::to_check_symb_ = false; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ - template<> bool roctracer::HipLoader::to_check_ = false; \ + template<> bool roctracer::HipLoader::to_check_open_ = false; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ - template<> bool roctracer::HccLoader::to_check_ = false; \ + template<> bool roctracer::HccLoader::to_check_open_ = false; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ template<> bool roctracer::RocTxLoader::to_load_ = true; diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 846648dd45..37aa0dc9eb 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -649,11 +649,13 @@ static roctracer_status_t roctracer_enable_callback_fun( } case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { +#if 0 if (op == HSA_API_ID_DISPATCH) { const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed"); break; } +#endif roctracer::hsa_support::cb_table.set(op, callback, user_data); break; } @@ -735,11 +737,14 @@ static roctracer_status_t roctracer_disable_callback_fun( } case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { +#if 0 if (op == HSA_API_ID_DISPATCH) { const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed"); break; } +#endif + roctracer::hsa_support::cb_table.set(op, NULL, NULL); break; } case ACTIVITY_DOMAIN_HCC_OPS: break; @@ -848,10 +853,14 @@ static roctracer_status_t roctracer_enable_activity_fun( roctracer::hsa_support::async_copy_callback_enabled = true; } else { const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); + if (roctracer::RocpLoader::Instance().InitActivityCallback == NULL) break; if (init_phase == true) { roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, (void*)pool); } + if (roctracer::RocpLoader::Instance().EnableActivityCallback == NULL) { + EXC_RAISING(ROCTRACER_STATUS_ERROR, "EnableActivityCallback not found"); + } const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); } @@ -944,6 +953,10 @@ static roctracer_status_t roctracer_disable_activity_fun( if (op == HSA_OP_ID_COPY) { roctracer::hsa_support::async_copy_callback_enabled = true; } else { + if (roctracer::RocpLoader::Instance().InitActivityCallback == NULL) break; + if (roctracer::RocpLoader::Instance().EnableActivityCallback == NULL) { + EXC_RAISING(ROCTRACER_STATUS_ERROR, "EnableActivityCallback not found"); + } const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); } diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 3877a4dead..14e3b847c0 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -312,9 +312,9 @@ void activity_callback(const char* begin, const char* end, void* arg) { if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); } else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) { fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)", - record->ps_sample.se, - record->ps_sample.cycle, - record->ps_sample.pc + record->pc_sample.se, + record->pc_sample.cycle, + record->pc_sample.pc ); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)", @@ -350,7 +350,6 @@ void init_tracing() { ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); // Enable KFD API tracing ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); // Enable rocTX ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL)); } @@ -368,7 +367,9 @@ void stop_tracing() { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); ROCTRACER_CALL(roctracer_flush_activity()); printf("# STOP #############################\n"); } From 3568ca4dbb91418872ee7c5733f891647fc523e6 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 11 Mar 2020 11:33:27 -0500 Subject: [PATCH 92/94] compatibility with pre-pcs rocprofiler version --- src/core/loader.h | 2 -- src/core/roctracer.cpp | 9 +-------- test/tool/tracer_tool.cpp | 2 +- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/core/loader.h b/src/core/loader.h index aa292418c4..dd30dc72dd 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -205,8 +205,6 @@ typedef BaseLoader RocTxLoader; template bool roctracer::BaseLoader::to_check_open_ = true; \ template bool roctracer::BaseLoader::to_check_symb_ = true; \ template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ - template<> bool roctracer::RocpLoader::to_check_open_ = false; \ - template<> bool roctracer::RocpLoader::to_check_symb_ = false; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> bool roctracer::HipLoader::to_check_open_ = false; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 37aa0dc9eb..e03bb0b176 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -853,14 +853,10 @@ static roctracer_status_t roctracer_enable_activity_fun( roctracer::hsa_support::async_copy_callback_enabled = true; } else { const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); - if (roctracer::RocpLoader::Instance().InitActivityCallback == NULL) break; if (init_phase == true) { roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, (void*)pool); } - if (roctracer::RocpLoader::Instance().EnableActivityCallback == NULL) { - EXC_RAISING(ROCTRACER_STATUS_ERROR, "EnableActivityCallback not found"); - } const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); } @@ -953,10 +949,7 @@ static roctracer_status_t roctracer_disable_activity_fun( if (op == HSA_OP_ID_COPY) { roctracer::hsa_support::async_copy_callback_enabled = true; } else { - if (roctracer::RocpLoader::Instance().InitActivityCallback == NULL) break; - if (roctracer::RocpLoader::Instance().EnableActivityCallback == NULL) { - EXC_RAISING(ROCTRACER_STATUS_ERROR, "EnableActivityCallback not found"); - } + if (roctracer::RocpLoader::GetRef() == NULL) break; const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); } diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 9ce34d549e..66a3856802 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -867,7 +867,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); } // Enable HIP API callbacks/activity From 1be273a1b41740e534867f4e44d85b3651b51965 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Sat, 14 Mar 2020 00:13:22 -0500 Subject: [PATCH 93/94] enable tool reloading --- script/hsaap.py | 7 ++++--- script/kfdap.py | 9 ++++---- src/core/journal.h | 2 +- src/core/roctracer.cpp | 43 +++++++++++++++++++-------------------- test/tool/tracer_tool.cpp | 19 ++++++++--------- 5 files changed, 39 insertions(+), 41 deletions(-) diff --git a/script/hsaap.py b/script/hsaap.py index f07c43d902..07a365e2b4 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -1,4 +1,5 @@ #!/usr/bin/python +from __future__ import print_function import os, sys, re OUT='inc/hsa_prof_str.h' @@ -36,7 +37,7 @@ LICENSE = \ ############################################################# # Error handler def fatal(module, msg): - print >>sys.stderr, module + ' Error: "' + msg + '"' + print (module + ' Error: "' + msg + '"', file = sys.stderr) sys.exit(1) # Get next text block @@ -490,7 +491,7 @@ class API_DescrParser: # main # Usage if len(sys.argv) != 3: - print >>sys.stderr, "Usage:", sys.argv[0], " " + print ("Usage:", sys.argv[0], " ", file=sys.stderr) sys.exit(1) else: ROOT = sys.argv[1] + '/' @@ -499,7 +500,7 @@ else: descr = API_DescrParser(OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE) out_file = ROOT + OUT -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content[:-1]) f.close() diff --git a/script/kfdap.py b/script/kfdap.py index 06248d9f26..a9c6defb5f 100755 --- a/script/kfdap.py +++ b/script/kfdap.py @@ -1,4 +1,5 @@ #!/usr/bin/python +from __future__ import print_function import os, sys, re OUT_H = 'inc/kfd_prof_str.h' @@ -33,7 +34,7 @@ LICENSE = \ ############################################################# # Error handler def fatal(module, msg): - print >>sys.stderr, module + ' Error: "' + msg + '"' + print (module + ' Error: "' + msg + '"', file = sys.stderr) sys.exit(1) # Get next text block @@ -539,7 +540,7 @@ class API_DescrParser: # main # Usage if len(sys.argv) != 3: - print >>sys.stderr, "Usage:", sys.argv[0], " " + print ("Usage:", sys.argv[0], " ", file = sys.stderr) sys.exit(1) else: ROOT = sys.argv[1] + '/' @@ -548,13 +549,13 @@ else: descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE) out_file = ROOT + OUT_H -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content_h[:-1]) f.close() out_file = ROOT + OUT_CPP -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content_cpp[:-1]) f.close() diff --git a/src/core/journal.h b/src/core/journal.h index f4d8a676b7..68f1e67709 100644 --- a/src/core/journal.h +++ b/src/core/journal.h @@ -47,7 +47,7 @@ class Journal { } ~Journal() { - for (auto& val : map_) delete val.second; + for (auto& val : *map_) delete val.second; delete map_; } diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index e03bb0b176..daedb97db9 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -686,8 +686,8 @@ static void roctracer_enable_callback_impl( roctracer_rtapi_callback_t callback, void* user_data) { - roctracer::cb_journal->registr({domain, op, {callback, user_data}}); - roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); + roctracer::cb_journal->registr({domain, op, {callback, user_data}}); + roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); } PUBLIC_API roctracer_status_t roctracer_enable_op_callback( @@ -1152,46 +1152,45 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( API_METHOD_SUFFIX } +static bool is_loaded = false; + PUBLIC_API bool roctracer_load() { - static bool is_loaded = false; ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); - if (is_loaded) return true; + if (is_loaded == true) return true; is_loaded = true; + if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; + if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; + ONLOAD_TRACE_END(); return true; } PUBLIC_API void roctracer_unload() { - static bool is_unloaded = false; - ONLOAD_TRACE("begin, unloaded(" << is_unloaded << ")"); + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); - if (is_unloaded == true) return; - is_unloaded = true; + if (is_loaded == false) return; + is_loaded = false; + + if (roctracer::cb_journal != NULL) { + delete roctracer::cb_journal; + roctracer::cb_journal = NULL; + } + if (roctracer::act_journal != NULL) { + delete roctracer::act_journal; + roctracer::act_journal = NULL; + } roctracer::trace_buffer.Flush(); roctracer::close_output_file(roctracer::kernel_file_handle); ONLOAD_TRACE_END(); } -// HSA-runtime tool on-load/unload methods -PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - ONLOAD_TRACE_BEG(); - const bool ret = roctracer_load(); - ONLOAD_TRACE_END(); - return ret; -} -PUBLIC_API void OnUnload() { - ONLOAD_TRACE("done"); -} - CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); - if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; - if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; + roctracer_load(); ONLOAD_TRACE_END(); } diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 66a3856802..b1114ecd75 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -585,19 +585,18 @@ void open_tracing_pool() { void close_tracing_pool() { if (roctracer_default_pool() != NULL) { ROCTRACER_CALL(roctracer_flush_activity()); - ROCTRACER_CALL(roctracer_close_pool()); } } +// tool library is loaded +static bool is_loaded = false; + // tool unload method void tool_unload() { - static bool is_unloaded = false; - ONLOAD_TRACE("begin, unloaded(" << is_unloaded << ")"); + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); - if (is_unloaded == true) return; - is_unloaded = true; - - roctracer_unload(); + if (is_loaded == false) return; + is_loaded = false; if (trace_roctx) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); @@ -627,12 +626,13 @@ void tool_unload() { // tool load method void tool_load() { - static bool is_loaded = false; ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); if (is_loaded == true) return; is_loaded = true; + roctracer::TraceBufferBase::StartWorkerThreadAll(); + // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); if (output_prefix != NULL) { @@ -817,9 +817,6 @@ void tool_load() { printf(")\n"); } - roctracer::TraceBufferBase::StartWorkerThreadAll(); - roctracer_load(); - ONLOAD_TRACE_END(); } From cb89ed3325ee0025438108065b3bbe1d2de41c7d Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 23 Mar 2020 17:35:18 -0500 Subject: [PATCH 94/94] change cmake links to directories --- CMakeLists.txt | 79 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f9facb60ef..eb65300fd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,39 +80,55 @@ set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" ) set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) # If the library is a release, strip the target library -if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) - add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) -endif () +#if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) +# add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) +#endif () ## Build tests add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test ) -## Install and packaging -set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} ) -message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" ) +## pbulic headers +set ( PUBLIC_HEADERS + roctracer.h + roctracer_hip.h + roctracer_hcc.h + roctracer_ext.h + ext/prof_protocol.h + ext/hsa_rt_utils.hpp + roctx.h +) -add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/include inc-link ) +## Installation and packaging +set ( DEST_NAME ${ROCPROFILER_NAME} ) +if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX ) + get_filename_component ( DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME ) + get_filename_component ( DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY ) + set ( CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR} ) +endif () +message ( "-----------Dest-name: ${DEST_NAME}" ) +message ( "------Install-prefix: ${CMAKE_INSTALL_PREFIX}" ) +message ( "-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}" ) + +#add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} +# COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/include inc-link ) add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link ) add_custom_target ( so-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link ) add_custom_target ( so-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link ) ## Install information -install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hip.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hcc.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_ext.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext ) -install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} ) -install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so ) -install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} ) -install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} ) -install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool ) +install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib ) +foreach ( header ${PUBLIC_HEADERS} ) + install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION ${DEST_NAME}/include ) + install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION include/${DEST_NAME} ) +endforeach () +#install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION include RENAME ${DEST_NAME} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} ) +install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION ${DEST_NAME}/tool ) ## rocTX set ( ROCTX_TARGET "roctx64" ) @@ -123,20 +139,19 @@ set_property ( TARGET ${ROCTX_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" ) set_property ( TARGET ${ROCTX_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) add_custom_target ( so-roctx-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link ) add_custom_target ( so-roctx-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link ) -install ( TARGETS "roctx64" LIBRARY DESTINATION lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) -install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) -install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} ) -install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} ) +install ( TARGETS "roctx64" LIBRARY DESTINATION ${DEST_NAME}/lib ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} ) ## KFD wrapper -install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) +install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION ${DEST_NAME}/lib ) ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )