diff --git a/projects/roctracer/CMakeLists.txt b/projects/roctracer/CMakeLists.txt index 04136e0081..e90a4f7924 100644 --- a/projects/roctracer/CMakeLists.txt +++ b/projects/roctracer/CMakeLists.txt @@ -109,6 +109,11 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) +## KFD wrapper +if ( DEFINED KFD_WRAPPER ) + install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) +endif () + ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" ) diff --git a/projects/roctracer/build.sh b/projects/roctracer/build.sh index f0bdaf473b..b94e58da94 100755 --- a/projects/roctracer/build.sh +++ b/projects/roctracer/build.sh @@ -17,6 +17,8 @@ if [ -z "$HCC_HOME" ] ; then export HCC_HOME="$ROCM_PATH/hcc"; fi if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi +if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi +if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) MAKE_OPTS="-j 8 -C $BUILD_DIR" @@ -27,10 +29,11 @@ pushd $BUILD_DIR cmake \ -DCMAKE_MODULE_PATH=$ROCTRACER_ROOT/cmake_modules \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ - -DCMAKE_PREFIX_PATH="$PACKAGE_ROOT" \ + -DCMAKE_PREFIX_PATH="$PREFIX_PATH" \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="DEB;RPM" \ + $HIP_VDI_OPT \ $ROCTRACER_ROOT make make mytest diff --git a/projects/roctracer/cmake_modules/env.cmake b/projects/roctracer/cmake_modules/env.cmake index fbeccf5d09..9ad3fbf23b 100644 --- a/projects/roctracer/cmake_modules/env.cmake +++ b/projects/roctracer/cmake_modules/env.cmake @@ -43,6 +43,18 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) add_definitions ( -DDEBUG_TRACE=1 ) endif() +## Enable HIP_VDI mode +if ( DEFINED HIP_VDI ) + add_definitions ( -DHIP_VDI=${HIP_VDI} ) +else() + set ( HIP_VDI 0 ) +endif() + +## Enable KFD wrapper +if ( DEFINED KFD_WRAPPER ) + add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} ) +endif() + ## Enable HIP/HCC local build if ( DEFINED LOCAL_BUILD ) add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} ) @@ -123,5 +135,7 @@ message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" ) message ( "-------------HCC-Inc: ${HCC_INC_DIR}" ) message ( "-------------HIP-Inc: ${HIP_INC_DIR}" ) message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" ) +message ( "-------------HIP-VDI: ${HIP_VDI}" ) +message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" ) message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" ) message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" ) diff --git a/projects/roctracer/inc/ext/prof_protocol.h b/projects/roctracer/inc/ext/prof_protocol.h index d6e08ca0f3..6d9cd62714 100644 --- a/projects/roctracer/inc/ext/prof_protocol.h +++ b/projects/roctracer/inc/ext/prof_protocol.h @@ -31,6 +31,7 @@ typedef enum { ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain @@ -83,7 +84,7 @@ struct activity_record_t { }; // Activity sync calback type -typedef activity_record_t* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); // Activity async calback type typedef void (*activity_id_callback_t)(activity_correlation_id_t id); typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); diff --git a/projects/roctracer/inc/roctracer_ext.h b/projects/roctracer/inc/roctracer_ext.h index 6a1edb9af3..c2f5c54542 100644 --- a/projects/roctracer/inc/roctracer_ext.h +++ b/projects/roctracer/inc/roctracer_ext.h @@ -34,13 +34,25 @@ THE SOFTWARE. #include "roctracer.h" +typedef void (*roctracer_start_cb_t)(); +typedef void (*roctracer_stop_cb_t)(); +typedef struct { + roctracer_start_cb_t start_cb; + roctracer_stop_cb_t stop_cb; +} roctracer_ext_properties_t; + #ifdef __cplusplus extern "C" { #endif // __cplusplus //////////////////////////////////////////////////////////////////////////////// -// Mark API -void roctracer_mark(const char* str); +// Application annotatin API + +// Tracing start API +void roctracer_start(); + +// Tracing stop API +void roctracer_stop(); //////////////////////////////////////////////////////////////////////////////// // External correlation id API diff --git a/projects/roctracer/inc/roctracer_hcc.h b/projects/roctracer/inc/roctracer_hcc.h index 7caee7c9ad..252b984d2a 100644 --- a/projects/roctracer/inc/roctracer_hcc.h +++ b/projects/roctracer/inc/roctracer_hcc.h @@ -23,11 +23,26 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HCC_H_ #define INC_ROCTRACER_HCC_H_ +#if HIP_VDI +#define HIP_OP_ID_NUMBER 3 +#define HIP_OP_ID_COPY 1 +extern "C" { +typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); +typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); +typedef const char* (hipGetOpName_t)(unsigned op); +} +#else // !HIP_VDI #if LOCAL_BUILD #include #else #include #endif +#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER +#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY +typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t; +typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t; +typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t; +#endif // !HIP_VDI #include "roctracer.h" diff --git a/projects/roctracer/inc/roctracer_roctx.h b/projects/roctracer/inc/roctracer_roctx.h index aaa95703db..329e974d61 100644 --- a/projects/roctracer/inc/roctracer_roctx.h +++ b/projects/roctracer/inc/roctracer_roctx.h @@ -77,6 +77,14 @@ bool RegisterApiCallback(uint32_t op, void* callback, void* arg); // Remove ROCTX callback for given opertaion id bool RemoveApiCallback(uint32_t op); +// Iterate range stack to support tracing start/stop +typedef struct { + const char* message; + uint32_t tid; +} roctx_range_data_t; +typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg); +void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg); + #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/projects/roctracer/script/hipap.py b/projects/roctracer/script/hipap.py deleted file mode 100755 index 829a2213c4..0000000000 --- a/projects/roctracer/script/hipap.py +++ /dev/null @@ -1,454 +0,0 @@ -#!/usr/bin/python -import os, sys, re - -verbose = 0 -PROF_HEADER = "hip_prof_str.h" -OUTPUT = PROF_HEADER -REC_MAX_LEN = 1024 - -# Fatal error termination -inp_file = 'none' -line_num = -1 -def fatal(msg): - if line_num != -1: - print >>sys.stderr, "Error: " + msg + ", file '" + inp_file + "', line (" + str(line_num) + ")" - else: - print >>sys.stderr, "Error: " + msg - sys.exit(1) - -# Verbose message -def message(msg): - if verbose: print >>sys.stdout, msg - -############################################################# -# Normalizing API arguments -def filtr_api_args(args_str): - args_str = re.sub(r'^\s*', r'', args_str); - args_str = re.sub(r'\s*$', r'', args_str); - args_str = re.sub(r'\s*,\s*', r',', args_str); - args_str = re.sub(r'\s+', r' ', args_str); - args_str = re.sub(r'void \*', r'void* ', args_str); - args_str = re.sub(r'(enum|struct) ', '', args_str); - return args_str - -# Normalizing types -def norm_api_types(type_str): - type_str = re.sub(r'uint32_t', r'unsigned int', type_str) - type_str = re.sub(r'^unsigned$', r'unsigned int', type_str) - return type_str - -# Creating a list of arguments [(type, name), ...] -def list_api_args(args_str): - args_str = filtr_api_args(args_str) - args_list = [] - if args_str != '': - for arg_pair in args_str.split(','): - if arg_pair == 'void': continue - arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair); - m = re.match("^(.*)\s(\S+)$", arg_pair); - if m: - arg_type = norm_api_types(m.group(1)) - arg_name = m.group(2) - args_list.append((arg_type, arg_name)) - else: - fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'") - return args_list; - -# Creating arguments string "type0, type1, ..." -def filtr_api_types(args_str): - args_list = list_api_args(args_str) - types_str = '' - for arg_tuple in args_list: - types_str += arg_tuple[0] + ', ' - return types_str - -# Creating options list [opt0, opt1, ...] -def filtr_api_opts(args_str): - args_list = list_api_args(args_str) - opts_list = [] - for arg_tuple in args_list: - opts_list.append(arg_tuple[1]) - return opts_list -############################################################# -# Parsing API header -# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); -def parse_api(inp_file_p, out): - global inp_file - global line_num - inp_file = inp_file_p - - beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); - api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)"); - end_pattern = re.compile("Texture"); - hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)') - nms_open_pattern = re.compile(r'namespace hip_impl {') - nms_close_pattern = re.compile(r'}') - - inp = open(inp_file, 'r') - - found = 0 - hidden = 0 - nms_level = 0; - record = "" - line_num = -1 - - for line in inp.readlines(): - record += re.sub(r'^\s+', r' ', line[:-1]) - line_num += 1 - - if len(record) > REC_MAX_LEN: - fatal("bad record \"" + record + "\"") - - if beg_pattern.match(record) and (hidden == 0) and (nms_level == 0): found = 1 - - if found != 0: - record = re.sub("\s__dparm\([^\)]*\)", '', record); - m = api_pattern.match(record) - if m: - found = 0 - if end_pattern.search(record): break - out[m.group(2)] = m.group(3) - else: continue - - hidden = 0 - if hidden_pattern.match(line): hidden = 1 - - if nms_open_pattern.match(line): nms_level += 1 - if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1 - if nms_level < 0: - fatal("nms level < 0") - - record = "" - - inp.close() - line_num = -1 -############################################################# -# Patching API implementation -# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { -# HIP_INIT_CB(hipSetupArgument, arg, size, offset); -# inp_file - input implementation source file -# api_map - input public API map [] => -# out - output map [] => [opt0, opt1, ...] -def parse_content(inp_file_p, api_map, out): - global inp_file - global line_num - inp_file = inp_file_p - - # API definition begin pattern - beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); - # API definition complete pattern - api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{"); - # API init macro pattern - init_pattern = re.compile("^\s*HIP_INIT[_\w]*_API\(([^,]+)(,|\))"); - target_pattern = re.compile("^(\s*HIP_INIT[^\(]*)(_API\()(.*)\);\s*$"); - - # Open input file - inp = open(inp_file, 'r') - - # API name - api_name = "" - # Valid public API found flag - api_valid = 0 - - # Input file patched content - content = '' - # Sub content for found API defiition - sub_content = '' - # Current record, accumulating several API definition related lines - record = '' - # Current input file line number - line_num = -1 - # API beginning found flag - found = 0 - - # Reading input file - for line in inp.readlines(): - # Accumulating record - record += re.sub(r'^\s+', r' ', line[:-1]) - line_num += 1 - - if len(record) > REC_MAX_LEN: - fatal("bad record \"" + record + "\"") - break; - - # Looking for API begin - if beg_pattern.match(record): found = 1 - - # Matching complete API definition - if found == 1: - record = re.sub("\s__dparm\([^\)]*\)", '', record); - m = api_pattern.match(record) - # Checking if complete API matched - if m: - found = 2 - api_name = m.group(2); - # Checking if API name is in the API map - if api_name in api_map: - # Getting API arguments - api_args = m.group(3) - # Getting etalon arguments from the API map - eta_args = api_map[api_name] - if eta_args == '': - eta_args = api_args - api_map[api_name] = eta_args - # Normalizing API arguments - api_types = filtr_api_types(api_args) - # Normalizing etalon arguments - eta_types = filtr_api_types(eta_args) - if api_types == eta_types: - # API is already found - if api_name in out: - fatal("API redefined \"" + api_name + "\", record \"" + record + "\"") - # Set valid public API found flag - api_valid = 1 - # Set output API map with API arguments list - out[api_name] = filtr_api_opts(api_args) - else: - # Warning about mismatched API, possible non public overloaded version - api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types - message("\t" + api_name + ':\n' + api_diff + '\n') - - # API found action - if found == 2: - # Looking for INIT macro - m = init_pattern.match(line) - if m: - found = 0 - if api_valid == 1: - api_valid = 0 - message("\t" + api_name) - else: - # Registering dummy API for non public API if the name in INIT is not NONE - init_name = m.group(1) - # Ignore if it is initialized as NONE - if init_name != 'NONE': - # Check if init name matching API name - if init_name != api_name: - fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") - # If init name is not in public API map then it is private API - # else it was not identified and will be checked on finish - if not init_name in api_map: - if init_name in out: - fatal("API reinit \"" + api_name + "\", record \"" + record + "\"") - out[init_name] = [] - elif re.search('}', line): - found = 0 - # Expect INIT macro for valid public API - if api_valid == 1: - api_valid = 0 - if api_name in out: - del out[api_name] - del api_map[api_name] - out['.' + api_name] = 1 - else: - fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"") - - if found != 1: record = "" - content += line - - inp.close() - line_num = -1 - - if len(out) != 0: - return content - else: - return '' - -# src path walk -def parse_src(api_map, src_path, src_patt, out): - pattern = re.compile(src_patt) - src_path = re.sub(r'\s', '', src_path) - for src_dir in src_path.split(':'): - message("Parsing " + src_dir + " for '" + src_patt + "'") - for root, dirs, files in os.walk(src_dir): - for fnm in files: - if pattern.search(fnm): - file = root + '/' + fnm - message(file) - content = parse_content(file, api_map, out); - if content != '': - f = open(file, 'w') - f.write(content) - f.close() -############################################################# -# Generating profiling primitives header -# api_map - public API map [] => [(type, name), ...] -# opts_map - opts map [] => [opt0, opt1, ...] -def generate_prof_header(f, api_map, opts_map): - # Private API list - priv_lst = [] - - f.write('// automatically generated sources\n') - f.write('#ifndef _HIP_PROF_STR_H\n'); - f.write('#define _HIP_PROF_STR_H\n'); - f.write('#include \n'); - f.write('#include \n'); - - # Generating dummy macro for non-public API - f.write('\n// Dummy API primitives\n') - f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n') - for name in opts_map: - if not name in api_map: - opts_lst = opts_map[name] - if len(opts_lst) != 0: - fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst)) - f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n') - priv_lst.append(name) - - for name in priv_lst: - message("Private: " + name) - - # Generating the callbacks ID enumaration - f.write('\n// HIP API callbacks ID enumaration\n') - f.write('enum hip_api_id_t {\n') - cb_id = 0 - for name in api_map.keys(): - f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') - cb_id += 1 - f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n') - f.write(' HIP_API_ID_ANY = ' + str(cb_id + 1) + ',\n') - f.write('\n') - f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n') - for name in priv_lst: - f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n') - f.write('};\n') - - # Generating the callbacks ID enumaration - f.write('\n// Return HIP API string\n') - f.write('static const char* hip_api_name(const uint32_t& id) {\n') - f.write(' switch(id) {\n') - for name in api_map.keys(): - f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n') - f.write(' };\n') - f.write(' return "unknown";\n') - f.write('};\n') - - # Generating the callbacks data structure - f.write('\n// HIP API callbacks data structure\n') - f.write( - 'struct hip_api_data_t {\n' + - ' uint64_t correlation_id;\n' + - ' uint32_t phase;\n' + - ' union {\n' - ) - for name, args in api_map.items(): - if len(args) != 0: - f.write(' struct {\n') - for arg_tuple in args: - f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n') - f.write(' } ' + name + ';\n') - f.write( - ' } args;\n' + - '};\n' - ) - - # Generating the callbacks args data filling macros - f.write('\n// HIP API callbacks args data filling macros\n') - for name, args in api_map.items(): - f.write('// ' + name + str(args) + '\n') - f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n') - if name in opts_map: - opts_list = opts_map[name] - if len(args) != len(opts_list): - fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list)) - # API args iterating: - # type is args[][0] - # name is args[][1] - for ind in range(0, len(args)): - arg_tuple = args[ind] - fld_name = arg_tuple[1] - arg_name = opts_list[ind] - f.write(' cb_data.args.' + name + '.' + fld_name + ' = ' + arg_name + '; \\\n') - f.write('};\n') - f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') - - # Generating the method for the API string, name and parameters - f.write('\n') - f.write('#if 0\n') - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') - - f.write('#endif // _HIP_PROF_STR_H\n'); - -############################################################# -# main -# Usage -if (len(sys.argv) > 1) and (sys.argv[1] == '-v'): - verbose = 1 - sys.argv.pop(1) - -if (len(sys.argv) < 3): - fatal ("Usage: " + sys.argv[0] + " [-v] \n" + - " -v - verbose messages\n" + - " example:\n" + - " $ hipap.py hip/include/hip/hcc_detail/hip_runtime_api.h hip/src") - -# API header file given as an argument -api_hfile = sys.argv[1] -if not os.path.isfile(api_hfile): - fatal("input file '" + api_hfile + "' not found") - -# Srcs directory given as an argument -src_pat = "\.cpp$" -src_dir = sys.argv[2] -if not os.path.isdir(src_dir): - fatal("src directory " + src_dir + "' not found") - -if len(sys.argv) > 3: OUTPUT = sys.argv[3] - -# API declaration map -api_map = { - 'hipHccModuleLaunchKernel': '' -} -# API options map -opts_map = {} - -# Parsing API header -parse_api(api_hfile, api_map) - -# Parsing sources -parse_src(api_map, src_dir, src_pat, opts_map) - -# Checking for non-conformant APIs -for name in opts_map.keys(): - m = re.match(r'\.(\S*)', name) - if m: - message("Init missing: " + m.group(1)) - del opts_map[name] - -# Converting api map to map of lists -# Checking for not found APIs -not_found = 0 -if len(opts_map) != 0: - for name in api_map.keys(): - args_str = api_map[name]; - api_map[name] = list_api_args(args_str) - if not name in opts_map: - fatal("not found: " + name) - not_found += 1 -if not_found != 0: - fatal(not_found + " API calls not found") - -# Generating output header file -with open(OUTPUT, 'w') as f: - generate_prof_header(f, api_map, opts_map) - -# Successfull exit -sys.exit(0) diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index c4f2bd1b5c..44b9fd81a3 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -12,17 +12,22 @@ set ( LIB_SRC add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) + +# Generating HSA tracing primitives execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" ) -set ( KFD_LIB "kfdwrapper64" ) -set ( KFD_LIB_SRC - ${LIB_DIR}/kfd/kfd_wrapper.cpp -) -execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) -add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) -target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) -target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) -execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) +# Generating KFD/Thunk tracing primitives +if ( DEFINED KFD_WRAPPER ) + set ( KFD_LIB "kfdwrapper64" ) + set ( KFD_LIB_SRC + ${LIB_DIR}/kfd/kfd_wrapper.cpp + ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) + add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) + target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) + target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) + execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) +endif() set ( ROCTX_LIB "roctx64" ) set ( ROCTX_LIB_SRC diff --git a/projects/roctracer/src/core/journal.h b/projects/roctracer/src/core/journal.h new file mode 100644 index 0000000000..f4d8a676b7 --- /dev/null +++ b/projects/roctracer/src/core/journal.h @@ -0,0 +1,102 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef SRC_CORE_JOURNAL_H_ +#define SRC_CORE_JOURNAL_H_ + +#include +#include + +namespace roctracer { + +template +class Journal { + public: + typedef std::mutex mutex_t; + typedef std::map domain_map_t; + typedef std::map journal_map_t; + + struct record_t { + uint32_t domain; + uint32_t op; + Data data; + }; + + Journal() { + domain_mask_ = 0; + map_ = new journal_map_t; + } + + ~Journal() { + for (auto& val : map_) delete val.second; + delete map_; + } + + void registr(const record_t& record) { + std::lock_guard lck(mutex_); + auto* map = get_domain_map(record.domain); + map->insert({record.op, record.data}); + } + + void remove(const record_t& record) { + std::lock_guard lck(mutex_); + auto* map = get_domain_map(record.domain); + map->erase(record.op); + } + + template + F foreach(const F& f_i) { + std::lock_guard lck(mutex_); + F f = f_i; + for (uint32_t domain = 0, mask = domain_mask_; mask != 0; ++domain, mask >>= 1) { + if (mask & 1) { + auto map = get_domain_map(domain); + auto begin = map->begin(); + auto end = map->end(); + for (auto it = begin; it != end; ++it) { + if (f.fun({domain, it->first, it->second}) == false) break; + } + } + } + return f; + } + + private: + domain_map_t* get_domain_map(const uint32_t& domain) { + domain_mask_ |= 1u << domain; + auto domain_it = map_->find(domain); + if (domain_it == map_->end()) { + auto* domain_map = new domain_map_t; + auto ret = map_->insert({domain, domain_map}); + domain_it = ret.first; + } + return domain_it->second; + } + + mutex_t mutex_; + journal_map_t* map_; + uint32_t domain_mask_; +}; + +} // namespace roctracer + +#endif // SRC_CORE_JOURNAL_H_ diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index dfddb11a20..fa2b0e62ad 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -29,12 +29,12 @@ class BaseLoader : public T { return f; } - static inline loader_t& Instance(const bool& preload = false) { + static inline loader_t& Instance() { loader_t* obj = instance_.load(std::memory_order_acquire); if (obj == NULL) { std::lock_guard lck(mutex_); if (instance_.load(std::memory_order_relaxed) == NULL) { - obj = new loader_t(preload); + obj = new loader_t(); instance_.store(obj, std::memory_order_release); } } @@ -45,11 +45,11 @@ class BaseLoader : public T { static void SetLibName(const char *name) { lib_name_ = name; } private: - BaseLoader(bool preload) { - const int flags = (preload) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; + BaseLoader() { + const int flags = RTLD_LAZY; handle_ = dlopen(lib_name_, flags); - if ((handle_ == NULL) && (strong_ld_check_)) { - fprintf(stderr, "roctracer: Loading '%s' failed, preload(%d), %s\n", lib_name_, (int)preload, dlerror()); + if (handle_ == NULL) { + fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); abort(); } dlerror(); @@ -64,7 +64,6 @@ class BaseLoader : public T { static mutex_t mutex_; static const char* lib_name_; static std::atomic instance_; - static const bool strong_ld_check_; void* handle_; }; @@ -99,26 +98,26 @@ class HipApi { }; // HCC runtime library loader class +#include "inc/roctracer_hcc.h" class HccApi { public: typedef BaseLoader Loader; - typedef decltype(Kalmar::CLAMP::InitActivityCallback) InitActivityCallback_t; - typedef decltype(Kalmar::CLAMP::EnableActivityCallback) EnableActivityCallback_t; - typedef decltype(Kalmar::CLAMP::GetCmdName) GetCmdName_t; - - InitActivityCallback_t* InitActivityCallback; - EnableActivityCallback_t* EnableActivityCallback; - GetCmdName_t* GetCmdName; + hipInitAsyncActivityCallback_t* InitActivityCallback; + hipEnableAsyncActivityCallback_t* EnableActivityCallback; + hipGetOpName_t* GetOpName; protected: void init(Loader* loader) { - // Kalmar::CLAMP::InitActivityCallback - InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); - // Kalmar::CLAMP::EnableActivityIdCallback - EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); - // Kalmar::CLAMP::GetCmdName - GetCmdName = loader->GetFun("GetCmdNameImpl"); +#if HIP_VDI + InitActivityCallback = loader->GetFun("InitActivityCallback"); + EnableActivityCallback = loader->GetFun("EnableActivityCallback"); + GetOpName = loader->GetFun("GetCmdName"); +#else + InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); + EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); + GetOpName = loader->GetFun("GetCmdNameImpl"); +#endif } }; @@ -141,20 +140,24 @@ class KfdApi { }; // rocTX runtime library loader class +#include "inc/roctracer_roctx.h" class RocTxApi { public: typedef BaseLoader Loader; - typedef bool (RegisterApiCallback_t)(uint32_t op, void* callback, void* arg); - typedef bool (RemoveApiCallback_t)(uint32_t op); + typedef decltype(RegisterApiCallback) RegisterApiCallback_t; + typedef decltype(RemoveApiCallback) RemoveApiCallback_t; + typedef decltype(RangeStackIterate) RangeStackIterate_t; RegisterApiCallback_t* RegisterApiCallback; RemoveApiCallback_t* RemoveApiCallback; + RangeStackIterate_t* RangeStackIterate; protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("RegisterApiCallback"); RemoveApiCallback = loader->GetFun("RemoveApiCallback"); + RangeStackIterate = loader->GetFun("RangeStackIterate"); } }; @@ -168,11 +171,9 @@ typedef BaseLoader RocTxLoader; #define LOADER_INSTANTIATE() \ template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ - template const bool roctracer::BaseLoader::strong_ld_check_ = false; template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ - template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ - template<> const bool roctracer::RocTxLoader::strong_ld_check_ = false; + template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; #endif // SRC_CORE_LOADER_H_ diff --git a/projects/roctracer/src/core/memory_pool.h b/projects/roctracer/src/core/memory_pool.h new file mode 100644 index 0000000000..fe2f1a6d3a --- /dev/null +++ b/projects/roctracer/src/core/memory_pool.h @@ -0,0 +1,210 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef MEMORY_POOL_H_ +#define MEMORY_POOL_H_ + +#include +#include + +#include +#include + +#include "util/exception.h" + +#define PTHREAD_CALL(call) \ + do { \ + int err = call; \ + if (err != 0) { \ + errno = err; \ + perror(#call); \ + abort(); \ + } \ + } while (0) + +namespace roctracer { + +class MemoryPool { + public: + typedef std::mutex mutex_t; + + static void allocator_default(char** ptr, size_t size, void* arg) { + (void)arg; + if (*ptr == NULL) { + *ptr = reinterpret_cast(malloc(size)); + } else if (size != 0) { + *ptr = reinterpret_cast(realloc(ptr, size)); + } else { + free(*ptr); + *ptr = NULL; + } + } + + MemoryPool(const roctracer_properties_t& properties) { + // Assigning pool allocator + alloc_fun_ = allocator_default; + alloc_arg_ = NULL; + if (properties.alloc_fun != NULL) { + alloc_fun_ = properties.alloc_fun; + alloc_arg_ = properties.alloc_arg; + } + + // Pool definition + buffer_size_ = properties.buffer_size; + const size_t pool_size = 2 * buffer_size_; + pool_begin_ = NULL; + alloc_fun_(&pool_begin_, pool_size, alloc_arg_); + if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed"); + pool_end_ = pool_begin_ + pool_size; + buffer_begin_ = pool_begin_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + + // Consuming read thread + read_callback_fun_ = properties.buffer_callback_fun; + read_callback_arg_ = properties.buffer_callback_arg; + consumer_arg_.set(this, NULL, NULL, true); + PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL)); + PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL)); + PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_)); + } + + ~MemoryPool() { + Flush(); + PTHREAD_CALL(pthread_cancel(consumer_thread_)); + void *res; + PTHREAD_CALL(pthread_join(consumer_thread_, &res)); + if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); + allocator_default(&pool_begin_, 0, alloc_arg_); + } + + template + void Write(const Record& record) { + std::lock_guard lock(write_mutex_); + getRecord(record); + } + + void Flush() { + std::lock_guard lock(write_mutex_); + if (write_ptr_ > buffer_begin_) { + spawn_reader(buffer_begin_, write_ptr_); + sync_reader(&consumer_arg_); + buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + } + } + + private: + struct consumer_arg_t { + MemoryPool* obj; + const char* begin; + const char* end; + volatile std::atomic valid; + void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) { + obj = obj_p; + begin = begin_p; + end = end_p; + valid.store(valid_p); + } + }; + + template + Record* getRecord(const Record& init) { + char* next = write_ptr_ + sizeof(Record); + if (next > buffer_end_) { + if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); + spawn_reader(buffer_begin_, write_ptr_); + buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; + buffer_end_ = buffer_begin_ + buffer_size_; + write_ptr_ = buffer_begin_; + next = write_ptr_ + sizeof(Record); + } + + Record* ptr = reinterpret_cast(write_ptr_); + write_ptr_ = next; + + *ptr = init; + return ptr; + } + + static void reset_reader(consumer_arg_t* arg) { + arg->valid.store(false); + } + + static void sync_reader(const consumer_arg_t* arg) { + while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield()); + } + + static void* reader_fun(void* consumer_arg) { + consumer_arg_t* arg = reinterpret_cast(consumer_arg); + roctracer::MemoryPool* obj = arg->obj; + + reset_reader(arg); + + while (1) { + PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_))); + while (arg->valid.load() == false) { + PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_))); + } + + obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_); + reset_reader(arg); + PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_))); + } + + return NULL; + } + + void spawn_reader(const char* data_begin, const char* data_end) { + sync_reader(&consumer_arg_); + PTHREAD_CALL(pthread_mutex_lock(&read_mutex_)); + consumer_arg_.set(this, data_begin, data_end, true); + PTHREAD_CALL(pthread_cond_signal(&read_cond_)); + PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_)); + } + + // pool allocator + roctracer_allocator_t alloc_fun_; + void* alloc_arg_; + + // Pool definition + size_t buffer_size_; + char* pool_begin_; + char* pool_end_; + char* buffer_begin_; + char* buffer_end_; + char* write_ptr_; + mutex_t write_mutex_; + + // Consuming read thread + roctracer_buffer_callback_t read_callback_fun_; + void* read_callback_arg_; + consumer_arg_t consumer_arg_; + pthread_t consumer_thread_; + pthread_mutex_t read_mutex_; + pthread_cond_t read_cond_; +}; + +} // namespace roctracer + +#endif // MEMORY_POOL_H_ diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index f9855e663e..1ae25fb885 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -23,21 +23,27 @@ THE SOFTWARE. #include "inc/roctracer.h" #include "inc/roctracer_hcc.h" #include "inc/roctracer_hip.h" +#include "inc/roctracer_ext.h" #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" +#ifdef KFD_WRAPPER #include "inc/roctracer_kfd.h" +#endif + +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include +#include "core/journal.h" #include "core/loader.h" +#include "core/memory_pool.h" #include "core/trace_buffer.h" #include "proxy/tracker.h" #include "ext/hsa_rt_utils.hpp" @@ -54,16 +60,6 @@ THE SOFTWARE. #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) -#define PTHREAD_CALL(call) \ - do { \ - int err = call; \ - if (err != 0) { \ - errno = err; \ - perror(#call); \ - abort(); \ - } \ - } while (0) - #define HIPAPI_CALL(call) \ do { \ hipError_t err = call; \ @@ -96,7 +92,6 @@ THE SOFTWARE. #endif static inline uint32_t GetPid() { return syscall(__NR_getpid); } -static inline uint32_t GetTid() { return syscall(__NR_gettid); } /////////////////////////////////////////////////////////////////////////////////////////////////// // Mark callback @@ -177,11 +172,52 @@ namespace roctracer { decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn; decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; +typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t; +typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t; +typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t; +typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t; + +struct cb_journal_data_t { + roctracer_rtapi_callback_t callback; + void* user_data; +}; +typedef Journal CbJournal; +CbJournal* cb_journal; + +struct act_journal_data_t { + roctracer_pool_t* pool; +}; +typedef Journal ActJournal; +ActJournal* act_journal; + +template +struct journal_functor_t { + typedef typename T::record_t record_t; + F f_; + journal_functor_t(F f) : f_(f) {} + bool fun(const record_t& record) { + f_((activity_domain_t)record.domain, record.op); + return true; + } +}; +typedef journal_functor_t cb_en_functor_t; +typedef journal_functor_t cb_dis_functor_t; +typedef journal_functor_t act_en_functor_t; +typedef journal_functor_t act_dis_functor_t; +template<> bool cb_en_functor_t::fun(const cb_en_functor_t::record_t& record) { + f_((activity_domain_t)record.domain, record.op, record.data.callback, record.data.user_data); + return true; +} +template<> bool act_en_functor_t::fun(const act_en_functor_t::record_t& record) { + f_((activity_domain_t)record.domain, record.op, record.data.pool); + return true; +} + void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry); void hsa_kernel_handler(::proxy::Tracker::entry_t* entry); TraceBuffer::flush_prm_t trace_buffer_prm[] = { - {roctracer::COPY_ENTRY_TYPE, hsa_async_copy_handler}, - {roctracer::KERNEL_ENTRY_TYPE, hsa_kernel_handler} + {COPY_ENTRY_TYPE, hsa_async_copy_handler}, + {KERNEL_ENTRY_TYPE, hsa_kernel_handler} }; TraceBuffer trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2); @@ -199,7 +235,12 @@ CoreApiTable CoreApiTable_saved{}; AmdExtTable AmdExtTable_saved{}; // Table of function pointers to HSA Image Extension ImageExtTable ImageExtTable_saved{}; -} +} // namespace hsa_support + +namespace ext_support { +roctracer_start_cb_t roctracer_start_cb = NULL; +roctracer_stop_cb_t roctracer_stop_cb = NULL; +} // namespace ext_suppoprt roctracer_status_t GetExcStatus(const std::exception& e) { const util::exception* roctracer_exc_ptr = dynamic_cast(&e); @@ -223,167 +264,19 @@ class GlobalCounter { GlobalCounter::mutex_t GlobalCounter::mutex_; GlobalCounter::counter_t GlobalCounter::counter_ = 0; -class MemoryPool { - public: - typedef std::mutex mutex_t; - - static void allocator_default(char** ptr, size_t size, void* arg) { - (void)arg; - if (*ptr == NULL) { - *ptr = reinterpret_cast(malloc(size)); - } else if (size != 0) { - *ptr = reinterpret_cast(realloc(ptr, size)); - } else { - free(*ptr); - *ptr = NULL; - } - } - - MemoryPool(const roctracer_properties_t& properties) { - // Assigning pool allocator - alloc_fun_ = allocator_default; - alloc_arg_ = NULL; - if (properties.alloc_fun != NULL) { - alloc_fun_ = properties.alloc_fun; - alloc_arg_ = properties.alloc_arg; - } - - // Pool definition - buffer_size_ = properties.buffer_size; - const size_t pool_size = 2 * buffer_size_; - pool_begin_ = NULL; - alloc_fun_(&pool_begin_, pool_size, alloc_arg_); - if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed"); - pool_end_ = pool_begin_ + pool_size; - buffer_begin_ = pool_begin_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - - // Consuming read thread - read_callback_fun_ = properties.buffer_callback_fun; - read_callback_arg_ = properties.buffer_callback_arg; - consumer_arg_.set(this, NULL, NULL, true); - PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL)); - PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL)); - PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_)); - } - - ~MemoryPool() { - Flush(); - PTHREAD_CALL(pthread_cancel(consumer_thread_)); - void *res; - PTHREAD_CALL(pthread_join(consumer_thread_, &res)); - if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); - allocator_default(&pool_begin_, 0, alloc_arg_); - } - - template - void Write(const Record& record) { - std::lock_guard lock(write_mutex_); - getRecord(record); - } - - void Flush() { - std::lock_guard lock(write_mutex_); - if (write_ptr_ > buffer_begin_) { - spawn_reader(buffer_begin_, write_ptr_); - sync_reader(&consumer_arg_); - buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - } - } - - private: - struct consumer_arg_t { - MemoryPool* obj; - const char* begin; - const char* end; - volatile std::atomic valid; - void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) { - obj = obj_p; - begin = begin_p; - end = end_p; - valid.store(valid_p); - } +// Records storage +struct roctracer_api_data_t { + union { + hip_api_data_t hip; }; - - template - Record* getRecord(const Record& init) { - char* next = write_ptr_ + sizeof(Record); - if (next > buffer_end_) { - if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); - spawn_reader(buffer_begin_, write_ptr_); - buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; - buffer_end_ = buffer_begin_ + buffer_size_; - write_ptr_ = buffer_begin_; - next = write_ptr_ + sizeof(Record); - } - - Record* ptr = reinterpret_cast(write_ptr_); - write_ptr_ = next; - - *ptr = init; - return ptr; - } - - static void reset_reader(consumer_arg_t* arg) { - arg->valid.store(false); - } - - static void sync_reader(const consumer_arg_t* arg) { - while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield()); - } - - static void* reader_fun(void* consumer_arg) { - consumer_arg_t* arg = reinterpret_cast(consumer_arg); - roctracer::MemoryPool* obj = arg->obj; - - reset_reader(arg); - - while (1) { - PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_))); - while (arg->valid.load() == false) { - PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_))); - } - - obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_); - reset_reader(arg); - PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_))); - } - - return NULL; - } - - void spawn_reader(const char* data_begin, const char* data_end) { - sync_reader(&consumer_arg_); - PTHREAD_CALL(pthread_mutex_lock(&read_mutex_)); - consumer_arg_.set(this, data_begin, data_end, true); - PTHREAD_CALL(pthread_cond_signal(&read_cond_)); - PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_)); - } - - // pool allocator - roctracer_allocator_t alloc_fun_; - void* alloc_arg_; - - // Pool definition - size_t buffer_size_; - char* pool_begin_; - char* pool_end_; - char* buffer_begin_; - char* buffer_end_; - char* write_ptr_; - mutex_t write_mutex_; - - // Consuming read thread - roctracer_buffer_callback_t read_callback_fun_; - void* read_callback_arg_; - consumer_arg_t consumer_arg_; - pthread_t consumer_thread_; - pthread_mutex_t read_mutex_; - pthread_cond_t read_cond_; + roctracer_api_data_t() {}; }; +struct record_pair_t { + roctracer_record_t record; + roctracer_api_data_t data; + record_pair_t() {}; +}; +static thread_local std::stack record_pair_stack; // Correlation id storage static thread_local activity_correlation_id_t correlation_id_tls = 0; @@ -407,7 +300,7 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre return it->second; } -roctracer_record_t* HIP_SyncActivityCallback( +void* HIP_SyncActivityCallback( uint32_t op_id, roctracer_record_t* record, const void* callback_data, @@ -416,23 +309,57 @@ roctracer_record_t* HIP_SyncActivityCallback( static hsa_rt_utils::Timer timer; const hip_api_data_t* data = reinterpret_cast(callback_data); + hip_api_data_t* data_ptr = const_cast(data); MemoryPool* pool = reinterpret_cast(arg); - if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL"); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { + + int phase = ACTIVITY_API_PHASE_ENTER; + if (record != NULL) { + if (data == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback: data is NULL"); + phase = data->phase; + } else if (pool != NULL) { + phase = ACTIVITY_API_PHASE_EXIT; + } + + if (phase == ACTIVITY_API_PHASE_ENTER) { + // Allocating a record if NULL passed + if (record == NULL) { + if (data != NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: record is NULL"); + record_pair_stack.push({}); + auto& top = record_pair_stack.top(); + record = &(top.record); + data = &(top.data.hip); + data_ptr = const_cast(data); + data_ptr->phase = phase; + } + + // Filing record info record->domain = ACTIVITY_DOMAIN_HIP_API; record->op = op_id; record->begin_ns = timer.timestamp_ns(); + // Correlation ID generating uint64_t correlation_id = data->correlation_id; if (correlation_id == 0) { correlation_id = GlobalCounter::Increment(); - const_cast(data)->correlation_id = correlation_id; + data_ptr->correlation_id = correlation_id; } record->correlation_id = correlation_id; + // Passing correlatin ID correlation_id_tls = correlation_id; - return record; + + return data_ptr; } else { + if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: pool is NULL"); + + // Getting record of stacked + if (record == NULL) { + if (record_pair_stack.empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty"); + auto& top = record_pair_stack.top(); + record = &(top.record); + } + + // Filing record info record->end_ns = timer.timestamp_ns(); record->process_id = syscall(__NR_getpid); record->thread_id = syscall(__NR_gettid); @@ -446,9 +373,15 @@ roctracer_record_t* HIP_SyncActivityCallback( pool->Write(ext_record); } + // Writing record to the buffer pool->Write(*record); + + // popping the record entry + if (!record_pair_stack.empty()) record_pair_stack.pop(); + // Clearing correlatin ID correlation_id_tls = 0; + return NULL; } } @@ -627,17 +560,19 @@ PUBLIC_API const char* roctracer_op_string( break; } case ACTIVITY_DOMAIN_HCC_OPS: { - return roctracer::HccLoader::Instance().GetCmdName(kind); + return roctracer::HccLoader::Instance().GetOpName(kind); break; } case ACTIVITY_DOMAIN_HIP_API: { return roctracer::HipLoader::Instance().ApiName(op); break; } +#if KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { return roctracer::kfd_support::GetApiName(op); break; } +#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -658,11 +593,13 @@ PUBLIC_API roctracer_status_t roctracer_op_code( if (kind != NULL) *kind = 0; break; } +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { *op = roctracer::kfd_support::GetApiCode(str); if (kind != NULL) *kind = 0; break; } +#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")"); } @@ -673,9 +610,11 @@ static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: return 1; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; - case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER; + case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; +#endif case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: @@ -685,18 +624,20 @@ static inline uint32_t get_op_num(const uint32_t& domain) { } // Enable runtime API callbacks -static void roctracer_enable_callback_impl( - uint32_t domain, +static roctracer_status_t roctracer_enable_callback_fun( + roctracer_domain_t domain, uint32_t op, roctracer_rtapi_callback_t callback, void* user_data) { switch (domain) { +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); break; } +#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { roctracer::hsa_support::cb_table.set(op, callback, user_data); @@ -718,6 +659,17 @@ static void roctracer_enable_callback_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_enable_callback_impl( + uint32_t domain, + uint32_t op, + roctracer_rtapi_callback_t callback, + void* user_data) +{ + roctracer::cb_journal->registr({domain, op, {callback, user_data}}); + roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); } PUBLIC_API roctracer_status_t roctracer_enable_op_callback( @@ -755,16 +707,18 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback( } // Disable runtime API callbacks -static void roctracer_disable_callback_impl( - uint32_t domain, +static roctracer_status_t roctracer_disable_callback_fun( + roctracer_domain_t domain, uint32_t op) { switch (domain) { +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error"); break; } +#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; @@ -783,6 +737,15 @@ static void roctracer_disable_callback_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_disable_callback_impl( + uint32_t domain, + uint32_t op) +{ + roctracer::cb_journal->remove({domain, op, {}}); + roctracer_disable_callback_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_callback( @@ -850,8 +813,8 @@ PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) { } // Enable activity records logging -static void roctracer_enable_activity_impl( - uint32_t domain, +static roctracer_status_t roctracer_enable_activity_fun( + roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) { @@ -883,6 +846,16 @@ static void roctracer_enable_activity_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_enable_activity_impl( + uint32_t domain, + uint32_t op, + roctracer_pool_t* pool) +{ + roctracer::act_journal->registr({domain, op, {pool}}); + roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool); } PUBLIC_API roctracer_status_t roctracer_enable_op_activity( @@ -917,8 +890,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity( } // Disable activity records logging -static void roctracer_disable_activity_impl( - uint32_t domain, +static roctracer_status_t roctracer_disable_activity_fun( + roctracer_domain_t domain, uint32_t op) { switch (domain) { @@ -943,6 +916,15 @@ static void roctracer_disable_activity_impl( default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } + return ROCTRACER_STATUS_SUCCESS; +} + +static void roctracer_disable_activity_impl( + uint32_t domain, + uint32_t op) +{ + roctracer::act_journal->remove({domain, op, {}}); + roctracer_disable_activity_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_activity( @@ -1011,10 +993,24 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act // Mark API PUBLIC_API void roctracer_mark(const char* str) { - if (mark_api_callback_ptr) { - mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL); - roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id - } + if (mark_api_callback_ptr) { + mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL); + roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id + } +} + +// Start API +PUBLIC_API void roctracer_start() { + if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); + roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); +} + +// Stop API +PUBLIC_API void roctracer_stop() { + roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); + if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); } // Set properties @@ -1032,11 +1028,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; roctracer::hsa_support::output_prefix = ops_properties->output_prefix; +#if 0 // HSA dispatches intercepting rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); rocprofiler::ProxyQueue::HsaIntercept(table); rocprofiler::InterceptQueue::HsaIntercept(table); +#endif // HSA async-copy tracing hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); @@ -1048,10 +1046,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( break; } +#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { roctracer::kfd_support::intercept_KFDApiTable(); break; } +#endif case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); @@ -1062,9 +1062,19 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( } case ACTIVITY_DOMAIN_HCC_OPS: case ACTIVITY_DOMAIN_HIP_API: { - const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB"); - if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name); +#ifdef HIP_VDI + const char* hip_lib_name = "libamdhip64.so"; + roctracer::HccLoader::SetLibName(hip_lib_name); + roctracer::HipLoader::SetLibName(hip_lib_name); +#endif mark_api_callback_ptr = reinterpret_cast(properties); + break; + } + case ACTIVITY_DOMAIN_EXT_API: { + roctracer_ext_properties_t* ops_properties = reinterpret_cast(properties); + roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb; + roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb; + break; } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); @@ -1113,6 +1123,8 @@ PUBLIC_API void OnUnload() { CONSTRUCTOR_API void constructor() { if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); } roctracer::util::Logger::Create(); + if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; + if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); } } diff --git a/projects/roctracer/src/roctx/roctx.cpp b/projects/roctracer/src/roctx/roctx.cpp index 61916f146a..fcf379f0fe 100644 --- a/projects/roctracer/src/roctx/roctx.cpp +++ b/projects/roctracer/src/roctx/roctx.cpp @@ -24,11 +24,13 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #include +#include +#include +#include #include "inc/ext/prof_protocol.h" #include "util/exception.h" #include "util/logger.h" -#include #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) @@ -62,12 +64,8 @@ THE SOFTWARE. (void)err; \ return X; -static thread_local std::stack message_stack; - -#if 0 -static inline uint32_t GetPid() { return syscall(__NR_getpid); } -static inline uint32_t GetTid() { return syscall(__NR_gettid); } -#endif +inline uint32_t GetPid() { return syscall(__NR_getpid); } +inline uint32_t GetTid() { return syscall(__NR_gettid); } //////////////////////////////////////////////////////////////////////////////// // Library errors enumaration @@ -80,12 +78,27 @@ typedef enum { // Library implementation // namespace roctx { +typedef std::stack message_stack_t; +typedef std::map thread_map_t; +typedef std::mutex map_mutex_t; +map_mutex_t map_mutex; +thread_map_t* thread_map = NULL; +static thread_local message_stack_t* message_stack = NULL; roctx_status_t GetExcStatus(const std::exception& e) { const roctracer::util::exception* roctx_exc_ptr = dynamic_cast(&e); return (roctx_exc_ptr) ? static_cast(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR; } +void thread_data_init() { + message_stack = new message_stack_t; + const auto tid = GetTid(); + + std::lock_guard lck(map_mutex); + if (thread_map == NULL) thread_map = new thread_map_t; + (*thread_map)[tid] = message_stack; +} + // callbacks table extern cb_table_t cb_table; } // namespace roctx @@ -119,31 +132,50 @@ PUBLIC_API void roctxMarkA(const char* message) { PUBLIC_API int roctxRangePushA(const char* message) { API_METHOD_PREFIX + if (roctx::message_stack == NULL) roctx::thread_data_init(); + roctx_api_data_t api_data{}; api_data.args.roctxRangePushA.message = strdup(message); activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg); if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg); - message_stack.push(strdup(message)); + roctx::message_stack->push(strdup(message)); + + return roctx::message_stack->size() - 1; API_METHOD_CATCH(-1); - return message_stack.size()-1; } PUBLIC_API int roctxRangePop() { API_METHOD_PREFIX + if (roctx::message_stack == NULL) roctx::thread_data_init(); + roctx_api_data_t api_data{}; activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg); if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg); - if (message_stack.empty()) { + if (roctx::message_stack->empty()) { EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!"); } else { - message_stack.pop(); + roctx::message_stack->pop(); } + + return roctx::message_stack->size(); API_METHOD_CATCH(-1) - return message_stack.size(); +} + +PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) { + for (const auto& entry : *roctx::thread_map) { + const auto tid = entry.first; + for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){ + std::string message = stack.top(); + roctx_range_data_t data{}; + data.message = message.c_str(); + data.tid = tid; + callback(&data, arg); + } + } } } // extern "C" diff --git a/projects/roctracer/test/CMakeLists.txt b/projects/roctracer/test/CMakeLists.txt index d794c44c39..3be85fc399 100644 --- a/projects/roctracer/test/CMakeLists.txt +++ b/projects/roctracer/test/CMakeLists.txt @@ -33,7 +33,7 @@ set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) add_custom_target( mytest COMMAND make -C "${TEST_DIR}/MatrixTranspose" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test" - COMMAND make -C "${TEST_DIR}/MatrixTranspose_test" + COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test" COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test" ) diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp index 240723cfe4..d2ecfb8484 100644 --- a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp @@ -36,6 +36,10 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Y 4 #define THREADS_PER_BLOCK_Z 1 +// Mark API +extern "C" +void roctracer_mark(const char* str); + // Device (Kernel) function, it must be void __global__ void matrixTranspose(float* out, float* in, const int width) { int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -82,6 +86,10 @@ int main() { hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + uint32_t iterations = 100; + while (iterations-- > 0) { + std::cout << "## Iteration (" << iterations << ") #################" << std::endl; + // Memory transfer from host to device hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); @@ -112,6 +120,8 @@ int main() { printf("PASSED!\n"); } + } + // free the resources on device side hipFree(gpuMatrix); hipFree(gpuTransposeMatrix); diff --git a/projects/roctracer/test/MatrixTranspose_test/Makefile b/projects/roctracer/test/MatrixTranspose_test/Makefile index 33538902f8..202980b804 100644 --- a/projects/roctracer/test/MatrixTranspose_test/Makefile +++ b/projects/roctracer/test/MatrixTranspose_test/Makefile @@ -3,6 +3,7 @@ LIB_PATH = $(ROOT_PATH)/build ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64 HSA_KMT_INC_PATH ?= /opt/rocm/include export LD_LIBRARY_PATH=$(LIB_PATH) +HIP_VDI ?= 0 ITERATIONS ?= 100 HIP_PATH?= $(wildcard /opt/rocm/hip) @@ -24,7 +25,7 @@ EXECUTABLE=./MatrixTranspose all: clean $(EXECUTABLE) -CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS) +CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) diff --git a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp index c8c835bd54..e4eb690802 100644 --- a/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -35,7 +35,7 @@ THE SOFTWARE. #include #ifndef ITERATIONS -# define ITERATIONS 100 +# define ITERATIONS 101 #endif #define WIDTH 1024 @@ -167,9 +167,9 @@ int main() { free(Matrix); free(TransposeMatrix); free(cpuTransposeMatrix); + } stop_tracing(); - } return errors; } @@ -285,7 +285,7 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->device_id, record->queue_id ); - if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)", record->external_id @@ -302,31 +302,39 @@ void activity_callback(const char* begin, const char* end, void* arg) { // Init tracing routine void init_tracing() { - std::cout << "# START #############################" << std::endl << std::flush; + std::cout << "# INIT #############################" << std::endl << std::flush; + // roctracer properties + roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL); // Allocating tracing pool roctracer_properties_t properties{}; properties.buffer_size = 0x1000; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); + // Enable HIP API callbacks + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL)); + // Enable HIP activity tracing + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); } // Start tracing routine void start_tracing() { - std::cout << "# START #############################" << std::endl << std::flush; - // Enable HIP API callbacks - ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL)); - // Enable HIP activity tracing - ROCTRACER_CALL(roctracer_enable_activity()); + std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush; + // Start + if ((iterations & 1) == 1) roctracer_start(); + else roctracer_stop(); } // Stop tracing routine void stop_tracing() { - ROCTRACER_CALL(roctracer_disable_callback()); - ROCTRACER_CALL(roctracer_disable_activity()); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); ROCTRACER_CALL(roctracer_flush_activity()); std::cout << "# STOP #############################" << std::endl << std::flush; } #else +void init_tracing() {} void start_tracing() {} void stop_tracing() {} #endif diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index 6840aacc63..af56efe5f3 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -33,6 +33,12 @@ if [ -n "$1" ] ; then test_filter=$1 fi +# debugger +debugger="" +if [ -n "$2" ] ; then + debugger=$2 +fi + # test check routin test_status=0 test_runnum=0 @@ -46,7 +52,7 @@ eval_test() { if [ $test_filter = -1 -o $test_filter = $test_number ] ; then echo "$label: \"$cmdline\"" test_runnum=$((test_runnum + 1)) - eval "$cmdline" + eval "$debugger $cmdline" if [ $? != 0 ] ; then echo "$label: FAILED" test_status=$(($test_status + 1)) @@ -59,7 +65,7 @@ eval_test() { # Standalone test # rocTrecer is used explicitely by test -eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" +eval_test "standalone HIP test" "./test/MatrixTranspose_test" # Tool test # rocTracer/tool is loaded by HSA runtime @@ -68,6 +74,8 @@ export ROCTRACER_DOMAIN="hip" # HIP test eval_test "tool HIP test" ./test/MatrixTranspose +# with trace sampling control +eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" # HSA test export ROCTRACER_DOMAIN="hsa" diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 15d24c9a82..71da132baa 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -25,13 +25,21 @@ THE SOFTWARE. #include /* names denangle */ #include +#include #include -#include /* For SYS_xxx definitions */ +#include +#include /* SYS_xxx definitions */ +#include +#include /* usleep */ +#include +#include #include #include #include +#ifdef KFD_WRAPPER #include +#endif #include #include #include @@ -60,14 +68,17 @@ hsa_rt_utils::Timer* timer = NULL; thread_local timestamp_t hsa_begin_timestamp = 0; thread_local timestamp_t hip_begin_timestamp = 0; thread_local timestamp_t kfd_begin_timestamp = 0; +bool trace_roctx = false; bool trace_hsa_api = false; bool trace_hsa_activity = false; -bool trace_hip = false; +bool trace_hip_api = false; +bool trace_hip_activity = false; bool trace_kfd = false; LOADER_INSTANTIATE(); // Global output file handle +FILE* roctx_file_handle = NULL; FILE* hsa_api_file_handle = NULL; FILE* hsa_async_copy_file_handle = NULL; FILE* hip_api_file_handle = NULL; @@ -79,6 +90,7 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler void fatal(const std::string msg) { + fflush(roctx_file_handle); fflush(hsa_api_file_handle); fflush(hsa_async_copy_file_handle); fflush(hip_api_file_handle); @@ -90,24 +102,6 @@ void fatal(const std::string msg) { abort(); } -// KFD API callback function -void kfd_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ - (void)arg; - const kfd_api_data_t* data = reinterpret_cast(callback_data); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { - kfd_begin_timestamp = timer->timestamp_fn_ns(); - } else { - const timestamp_t end_timestamp = timer->timestamp_fn_ns(); - std::ostringstream os; - os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); - fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); - } -} // C++ symbol demangle static inline const char* cxx_demangle(const char* symbol) { size_t funcnamesize; @@ -116,6 +110,110 @@ static inline const char* cxx_demangle(const char* symbol) { return (ret != NULL) ? ret : symbol; } +// Tracing control thread +uint32_t control_delay_us = 0; +uint32_t control_len_us = 0; +uint32_t control_dist_us = 0; +void* control_thr_fun(void*) { + const uint32_t delay_sec = control_delay_us / 1000000; + const uint32_t delay_us = control_delay_us % 1000000; + const uint32_t len_sec = control_len_us / 1000000; + const uint32_t len_us = control_len_us % 1000000; + const uint32_t dist_sec = control_dist_us / 1000000; + const uint32_t dist_us = control_dist_us % 1000000; + bool start = true; + + sleep(delay_sec); + usleep(delay_us); + + while (1) { + if (start) { + start = false; + roctracer_start(); + sleep(len_sec); + usleep(len_us); + } else { + start = true; + roctracer_stop(); + sleep(dist_sec); + usleep(dist_us); + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// rocTX annotation tracing + +struct roctx_trace_entry_t { + uint32_t valid; + uint32_t type; + uint32_t cid; + timestamp_t timestamp; + uint32_t pid; + uint32_t tid; + const char* message; +}; + +void roctx_flush_cb(roctx_trace_entry_t* entry); +roctracer::TraceBuffer::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}}; +roctracer::TraceBuffer roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1); + +// rocTX callback function +static inline void roctx_callback_fun( + uint32_t domain, + uint32_t cid, + uint32_t tid, + const char* message) +{ + const timestamp_t timestamp = timer->timestamp_fn_ns(); + roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); + entry->valid = roctracer::TRACE_ENTRY_COMPL; + entry->type = 0; + entry->cid = cid; + entry->timestamp = timestamp; + entry->pid = GetPid(); + entry->tid = tid; + entry->message = (message != NULL) ? strdup(message) : NULL; +} + +void roctx_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const roctx_api_data_t* data = reinterpret_cast(callback_data); + roctx_callback_fun(domain, cid, GetTid(), data->args.message); +} + +// Start/Stop callbacks +void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) { + const bool* is_stop_ptr = (bool*)arg; + const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA; + const char* message = (*is_stop_ptr == true) ? NULL : data->message; + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message); +} +void stop_callback() { + bool is_stop = true; + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); +} +void start_callback() { + bool is_stop = false; + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); +} + +void roctx_flush_cb(roctx_trace_entry_t* entry) { + std::ostringstream os; + os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; + if (entry->message != NULL) os << ":\"" << entry->message << "\""; + else os << ":\"\""; + fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HSA API tracing + struct hsa_api_trace_entry_t { uint32_t valid; uint32_t type; @@ -186,6 +284,9 @@ struct hip_api_trace_entry_t { void* ptr; }; +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HIP API tracing + void hip_api_flush_cb(hip_api_trace_entry_t* entry); roctracer::TraceBuffer::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}}; roctracer::TraceBuffer hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1); @@ -221,8 +322,10 @@ void hip_api_callback( entry->ptr = *(data->args.hipMalloc.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: +#if !HIP_VDI case HIP_API_ID_hipExtModuleLaunchKernel: case HIP_API_ID_hipHccModuleLaunchKernel: +#endif const hipFunction_t f = data->args.hipModuleLaunchKernel.f; if (f != NULL) { entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f)); @@ -289,8 +392,10 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: +#if !HIP_VDI case HIP_API_ID_hipExtModuleLaunchKernel: case HIP_API_ID_hipHccModuleLaunchKernel: +#endif fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(), cxx_demangle(entry->name), @@ -328,6 +433,32 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// KFD API tracing + +// KFD API callback function +#ifdef KFD_WRAPPER +void kfd_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const kfd_api_data_t* data = reinterpret_cast(callback_data); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + kfd_begin_timestamp = timer->timestamp_fn_ns(); + } else { + const timestamp_t end_timestamp = timer->timestamp_fn_ns(); + std::ostringstream os; + os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); + fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); + } +} +#endif + +/////////////////////////////////////////////////////////////////////////////////////////////////////// + // Input parser std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) { const std::string space_chars_set = " \t"; @@ -401,24 +532,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); } timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); - // API traces switches - const char* trace_domain = getenv("ROCTRACER_DOMAIN"); - if (trace_domain != NULL) { - if (strncmp(trace_domain, "hsa", 3) == 0) { - trace_hsa_api = true; - trace_hsa_activity = true; - } - if (strncmp(trace_domain, "hip", 3) == 0) { - trace_hip = true; - } - if (strncmp(trace_domain, "sys", 3) == 0) { - trace_hsa_api = true; - trace_hip = true; - } - } - - trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0); - // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); if (output_prefix != NULL) { @@ -431,11 +544,41 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } + // API traces switches + const char* trace_domain = getenv("ROCTRACER_DOMAIN"); + if (trace_domain != NULL) { + // ROCTX domain + if (std::string(trace_domain).find("roctx") != std::string::npos) { + trace_roctx = true; + } + + // HSA/HIP domains enabling + if (std::string(trace_domain).find("hsa") != std::string::npos) { + trace_hsa_api = true; + trace_hsa_activity = true; + } + if (std::string(trace_domain).find("hip") != std::string::npos) { + trace_hip_api = true; + trace_hip_activity = true; + } + if (std::string(trace_domain).find("sys") != std::string::npos) { + trace_hsa_api = true; + trace_hip_api = true; + trace_hip_activity = true; + } + + // KFD domain enabling + if (std::string(trace_domain).find("kfd") != std::string::npos) { + trace_kfd = true; + } + } + // API trace vector std::vector hsa_api_vec; std::vector kfd_api_vec; printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); + // XML input const char* xml_name = getenv("ROCP_INPUT"); if (xml_name != NULL) { @@ -458,23 +601,28 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, break; } + if (name == "rocTX") { + found = true; + trace_roctx = true; + } if (name == "HSA") { found = true; trace_hsa_api = true; hsa_api_vec = api_vec; } - if (name == "KFD") { - found = true; - trace_kfd = true; - kfd_api_vec = api_vec; - } if (name == "GPU") { found = true; trace_hsa_activity = true; } if (name == "HIP") { found = true; - trace_hip = true; + trace_hip_api = true; + trace_hip_activity = true; + } + if (name == "KFD") { + found = true; + trace_kfd = true; + kfd_api_vec = api_vec; } } @@ -482,7 +630,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } printf("\n"); - // Enable HSA API callbacks + // Disable HIP activity if HSA activity was set + if (trace_hsa_activity == true) trace_hip_activity = false; + + // Enable rpcTX callbacks + if (trace_roctx) { + roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt"); + + // initialize HSA tracing + roctracer_ext_properties_t properties { + start_callback, + stop_callback + }; + roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties); + + fprintf(stdout, " rocTX-trace()\n"); fflush(stdout); + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); + } + + // Enable HSA API callbacks/activity if (trace_hsa_api) { hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); @@ -504,6 +670,76 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } + // Enable HSA GPU activity + if (trace_hsa_activity) { + hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); + + // initialize HSA tracing + roctracer::hsa_ops_properties_t ops_properties { + table, + reinterpret_cast(hsa_activity_callback), + NULL, + output_prefix + }; + roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); + + fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + } + + // Enable HIP API callbacks/activity + if (trace_hip_api || trace_hip_activity) { + hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); + hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); + + fprintf(stdout, " HIP-trace()\n"); fflush(stdout); + // roctracer properties + roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 0x80000; + properties.buffer_callback_fun = hcc_activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + if (trace_hip_api) { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + } + if (trace_hip_activity) { + ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + } + } + + const char* ctrl_str = getenv("ROCP_CTRL_RATE"); + if (ctrl_str != NULL) { + uint32_t ctrl_delay = 0; + uint32_t ctrl_len = 0; + uint32_t ctrl_rate = 0; + int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); + if (ret != 3) { + fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str); + abort(); + } + if (ctrl_len > ctrl_rate) { + fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate); + abort(); + } + control_dist_us = ctrl_rate - ctrl_len; + control_len_us = ctrl_len; + control_delay_us = ctrl_delay; + + fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); + + roctracer_stop(); + + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, control_thr_fun, NULL); + } + +#ifdef KFD_WRAPPER + // Enable KFD API callbacks/activity if (trace_kfd) { kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); // initialize KFD tracing @@ -523,38 +759,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } printf(")\n"); } - if (trace_hsa_activity) { - hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); - - // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties{ - table, - reinterpret_cast(hsa_activity_callback), - NULL, - output_prefix}; - roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); - - fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); - } - - // Enable HIP API callbacks/activity - if (trace_hip) { - hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); - hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); - - fprintf(stdout, " HIP-trace()\n"); fflush(stdout); - // Allocating tracing pool - roctracer_properties_t properties{}; - properties.buffer_size = 0x80000; - properties.buffer_callback_fun = hcc_activity_callback; - ROCTRACER_CALL(roctracer_open_pool(&properties)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); - - roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); - } +#endif if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); @@ -570,6 +775,12 @@ void tool_unload(bool destruct) { is_unloaded = true; roctracer_unload(destruct); + if (trace_roctx) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); + + roctx_trace_buffer.Flush(); + close_output_file(roctx_file_handle); + } if (trace_hsa_api) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); @@ -581,7 +792,7 @@ void tool_unload(bool destruct) { close_output_file(hsa_async_copy_file_handle); } - if (trace_hip) { + if (trace_hip_api || trace_hip_activity) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));