Merge branch 'amd-master' into rkebichi-patch-kfd-1

[ROCm/roctracer commit: 2bcf1c5554]
Этот коммит содержится в:
eshcherb
2019-11-11 19:05:50 -06:00
коммит произвёл GitHub
родитель cbb832cf23 f6f685f8d4
Коммит 69076632d9
20 изменённых файлов: 1000 добавлений и 796 удалений
+5
Просмотреть файл
@@ -109,6 +109,11 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include )
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so )
## KFD wrapper
if ( DEFINED KFD_WRAPPER )
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib )
endif ()
## Packaging directives
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" )
+4 -1
Просмотреть файл
@@ -17,6 +17,8 @@ if [ -z "$HCC_HOME" ] ; then export HCC_HOME="$ROCM_PATH/hcc"; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
MAKE_OPTS="-j 8 -C $BUILD_DIR"
@@ -27,10 +29,11 @@ pushd $BUILD_DIR
cmake \
-DCMAKE_MODULE_PATH=$ROCTRACER_ROOT/cmake_modules \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_PREFIX_PATH="$PACKAGE_ROOT" \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
-DCPACK_GENERATOR="DEB;RPM" \
$HIP_VDI_OPT \
$ROCTRACER_ROOT
make
make mytest
+14
Просмотреть файл
@@ -43,6 +43,18 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
add_definitions ( -DDEBUG_TRACE=1 )
endif()
## Enable HIP_VDI mode
if ( DEFINED HIP_VDI )
add_definitions ( -DHIP_VDI=${HIP_VDI} )
else()
set ( HIP_VDI 0 )
endif()
## Enable KFD wrapper
if ( DEFINED KFD_WRAPPER )
add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} )
endif()
## Enable HIP/HCC local build
if ( DEFINED LOCAL_BUILD )
add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} )
@@ -123,5 +135,7 @@ message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" )
message ( "-------------HCC-Inc: ${HCC_INC_DIR}" )
message ( "-------------HIP-Inc: ${HIP_INC_DIR}" )
message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" )
message ( "-------------HIP-VDI: ${HIP_VDI}" )
message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" )
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
+2 -1
Просмотреть файл
@@ -31,6 +31,7 @@ typedef enum {
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
@@ -83,7 +84,7 @@ struct activity_record_t {
};
// Activity sync calback type
typedef activity_record_t* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
// Activity async calback type
typedef void (*activity_id_callback_t)(activity_correlation_id_t id);
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
+14 -2
Просмотреть файл
@@ -34,13 +34,25 @@ THE SOFTWARE.
#include "roctracer.h"
typedef void (*roctracer_start_cb_t)();
typedef void (*roctracer_stop_cb_t)();
typedef struct {
roctracer_start_cb_t start_cb;
roctracer_stop_cb_t stop_cb;
} roctracer_ext_properties_t;
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Mark API
void roctracer_mark(const char* str);
// Application annotatin API
// Tracing start API
void roctracer_start();
// Tracing stop API
void roctracer_stop();
////////////////////////////////////////////////////////////////////////////////
// External correlation id API
+15
Просмотреть файл
@@ -23,11 +23,26 @@ THE SOFTWARE.
#ifndef INC_ROCTRACER_HCC_H_
#define INC_ROCTRACER_HCC_H_
#if HIP_VDI
#define HIP_OP_ID_NUMBER 3
#define HIP_OP_ID_COPY 1
extern "C" {
typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
typedef const char* (hipGetOpName_t)(unsigned op);
}
#else // !HIP_VDI
#if LOCAL_BUILD
#include <hc_prof_runtime.h>
#else
#include <hcc/hc_prof_runtime.h>
#endif
#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER
#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY
typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t;
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t;
typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t;
#endif // !HIP_VDI
#include "roctracer.h"
+8
Просмотреть файл
@@ -77,6 +77,14 @@ bool RegisterApiCallback(uint32_t op, void* callback, void* arg);
// Remove ROCTX callback for given opertaion id
bool RemoveApiCallback(uint32_t op);
// Iterate range stack to support tracing start/stop
typedef struct {
const char* message;
uint32_t tid;
} roctx_range_data_t;
typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg);
void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg);
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
-454
Просмотреть файл
@@ -1,454 +0,0 @@
#!/usr/bin/python
import os, sys, re
verbose = 0
PROF_HEADER = "hip_prof_str.h"
OUTPUT = PROF_HEADER
REC_MAX_LEN = 1024
# Fatal error termination
inp_file = 'none'
line_num = -1
def fatal(msg):
if line_num != -1:
print >>sys.stderr, "Error: " + msg + ", file '" + inp_file + "', line (" + str(line_num) + ")"
else:
print >>sys.stderr, "Error: " + msg
sys.exit(1)
# Verbose message
def message(msg):
if verbose: print >>sys.stdout, msg
#############################################################
# Normalizing API arguments
def filtr_api_args(args_str):
args_str = re.sub(r'^\s*', r'', args_str);
args_str = re.sub(r'\s*$', r'', args_str);
args_str = re.sub(r'\s*,\s*', r',', args_str);
args_str = re.sub(r'\s+', r' ', args_str);
args_str = re.sub(r'void \*', r'void* ', args_str);
args_str = re.sub(r'(enum|struct) ', '', args_str);
return args_str
# Normalizing types
def norm_api_types(type_str):
type_str = re.sub(r'uint32_t', r'unsigned int', type_str)
type_str = re.sub(r'^unsigned$', r'unsigned int', type_str)
return type_str
# Creating a list of arguments [(type, name), ...]
def list_api_args(args_str):
args_str = filtr_api_args(args_str)
args_list = []
if args_str != '':
for arg_pair in args_str.split(','):
if arg_pair == 'void': continue
arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair);
m = re.match("^(.*)\s(\S+)$", arg_pair);
if m:
arg_type = norm_api_types(m.group(1))
arg_name = m.group(2)
args_list.append((arg_type, arg_name))
else:
fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'")
return args_list;
# Creating arguments string "type0, type1, ..."
def filtr_api_types(args_str):
args_list = list_api_args(args_str)
types_str = ''
for arg_tuple in args_list:
types_str += arg_tuple[0] + ', '
return types_str
# Creating options list [opt0, opt1, ...]
def filtr_api_opts(args_str):
args_list = list_api_args(args_str)
opts_list = []
for arg_tuple in args_list:
opts_list.append(arg_tuple[1])
return opts_list
#############################################################
# Parsing API header
# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset);
def parse_api(inp_file_p, out):
global inp_file
global line_num
inp_file = inp_file_p
beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\(");
api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)");
end_pattern = re.compile("Texture");
hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)')
nms_open_pattern = re.compile(r'namespace hip_impl {')
nms_close_pattern = re.compile(r'}')
inp = open(inp_file, 'r')
found = 0
hidden = 0
nms_level = 0;
record = ""
line_num = -1
for line in inp.readlines():
record += re.sub(r'^\s+', r' ', line[:-1])
line_num += 1
if len(record) > REC_MAX_LEN:
fatal("bad record \"" + record + "\"")
if beg_pattern.match(record) and (hidden == 0) and (nms_level == 0): found = 1
if found != 0:
record = re.sub("\s__dparm\([^\)]*\)", '', record);
m = api_pattern.match(record)
if m:
found = 0
if end_pattern.search(record): break
out[m.group(2)] = m.group(3)
else: continue
hidden = 0
if hidden_pattern.match(line): hidden = 1
if nms_open_pattern.match(line): nms_level += 1
if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1
if nms_level < 0:
fatal("nms level < 0")
record = ""
inp.close()
line_num = -1
#############################################################
# Patching API implementation
# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) {
# HIP_INIT_CB(hipSetupArgument, arg, size, offset);
# inp_file - input implementation source file
# api_map - input public API map [<api name>] => <api args>
# out - output map [<api name>] => [opt0, opt1, ...]
def parse_content(inp_file_p, api_map, out):
global inp_file
global line_num
inp_file = inp_file_p
# API definition begin pattern
beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\(");
# API definition complete pattern
api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{");
# API init macro pattern
init_pattern = re.compile("^\s*HIP_INIT[_\w]*_API\(([^,]+)(,|\))");
target_pattern = re.compile("^(\s*HIP_INIT[^\(]*)(_API\()(.*)\);\s*$");
# Open input file
inp = open(inp_file, 'r')
# API name
api_name = ""
# Valid public API found flag
api_valid = 0
# Input file patched content
content = ''
# Sub content for found API defiition
sub_content = ''
# Current record, accumulating several API definition related lines
record = ''
# Current input file line number
line_num = -1
# API beginning found flag
found = 0
# Reading input file
for line in inp.readlines():
# Accumulating record
record += re.sub(r'^\s+', r' ', line[:-1])
line_num += 1
if len(record) > REC_MAX_LEN:
fatal("bad record \"" + record + "\"")
break;
# Looking for API begin
if beg_pattern.match(record): found = 1
# Matching complete API definition
if found == 1:
record = re.sub("\s__dparm\([^\)]*\)", '', record);
m = api_pattern.match(record)
# Checking if complete API matched
if m:
found = 2
api_name = m.group(2);
# Checking if API name is in the API map
if api_name in api_map:
# Getting API arguments
api_args = m.group(3)
# Getting etalon arguments from the API map
eta_args = api_map[api_name]
if eta_args == '':
eta_args = api_args
api_map[api_name] = eta_args
# Normalizing API arguments
api_types = filtr_api_types(api_args)
# Normalizing etalon arguments
eta_types = filtr_api_types(eta_args)
if api_types == eta_types:
# API is already found
if api_name in out:
fatal("API redefined \"" + api_name + "\", record \"" + record + "\"")
# Set valid public API found flag
api_valid = 1
# Set output API map with API arguments list
out[api_name] = filtr_api_opts(api_args)
else:
# Warning about mismatched API, possible non public overloaded version
api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types
message("\t" + api_name + ':\n' + api_diff + '\n')
# API found action
if found == 2:
# Looking for INIT macro
m = init_pattern.match(line)
if m:
found = 0
if api_valid == 1:
api_valid = 0
message("\t" + api_name)
else:
# Registering dummy API for non public API if the name in INIT is not NONE
init_name = m.group(1)
# Ignore if it is initialized as NONE
if init_name != 'NONE':
# Check if init name matching API name
if init_name != api_name:
fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'")
# If init name is not in public API map then it is private API
# else it was not identified and will be checked on finish
if not init_name in api_map:
if init_name in out:
fatal("API reinit \"" + api_name + "\", record \"" + record + "\"")
out[init_name] = []
elif re.search('}', line):
found = 0
# Expect INIT macro for valid public API
if api_valid == 1:
api_valid = 0
if api_name in out:
del out[api_name]
del api_map[api_name]
out['.' + api_name] = 1
else:
fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"")
if found != 1: record = ""
content += line
inp.close()
line_num = -1
if len(out) != 0:
return content
else:
return ''
# src path walk
def parse_src(api_map, src_path, src_patt, out):
pattern = re.compile(src_patt)
src_path = re.sub(r'\s', '', src_path)
for src_dir in src_path.split(':'):
message("Parsing " + src_dir + " for '" + src_patt + "'")
for root, dirs, files in os.walk(src_dir):
for fnm in files:
if pattern.search(fnm):
file = root + '/' + fnm
message(file)
content = parse_content(file, api_map, out);
if content != '':
f = open(file, 'w')
f.write(content)
f.close()
#############################################################
# Generating profiling primitives header
# api_map - public API map [<api name>] => [(type, name), ...]
# opts_map - opts map [<api name>] => [opt0, opt1, ...]
def generate_prof_header(f, api_map, opts_map):
# Private API list
priv_lst = []
f.write('// automatically generated sources\n')
f.write('#ifndef _HIP_PROF_STR_H\n');
f.write('#define _HIP_PROF_STR_H\n');
f.write('#include <sstream>\n');
f.write('#include <string>\n');
# Generating dummy macro for non-public API
f.write('\n// Dummy API primitives\n')
f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n')
for name in opts_map:
if not name in api_map:
opts_lst = opts_map[name]
if len(opts_lst) != 0:
fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst))
f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n')
priv_lst.append(name)
for name in priv_lst:
message("Private: " + name)
# Generating the callbacks ID enumaration
f.write('\n// HIP API callbacks ID enumaration\n')
f.write('enum hip_api_id_t {\n')
cb_id = 0
for name in api_map.keys():
f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n')
cb_id += 1
f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n')
f.write(' HIP_API_ID_ANY = ' + str(cb_id + 1) + ',\n')
f.write('\n')
f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n')
for name in priv_lst:
f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n')
f.write('};\n')
# Generating the callbacks ID enumaration
f.write('\n// Return HIP API string\n')
f.write('static const char* hip_api_name(const uint32_t& id) {\n')
f.write(' switch(id) {\n')
for name in api_map.keys():
f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n')
f.write(' };\n')
f.write(' return "unknown";\n')
f.write('};\n')
# Generating the callbacks data structure
f.write('\n// HIP API callbacks data structure\n')
f.write(
'struct hip_api_data_t {\n' +
' uint64_t correlation_id;\n' +
' uint32_t phase;\n' +
' union {\n'
)
for name, args in api_map.items():
if len(args) != 0:
f.write(' struct {\n')
for arg_tuple in args:
f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n')
f.write(' } ' + name + ';\n')
f.write(
' } args;\n' +
'};\n'
)
# Generating the callbacks args data filling macros
f.write('\n// HIP API callbacks args data filling macros\n')
for name, args in api_map.items():
f.write('// ' + name + str(args) + '\n')
f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n')
if name in opts_map:
opts_list = opts_map[name]
if len(args) != len(opts_list):
fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list))
# API args iterating:
# type is args[<ind>][0]
# name is args[<ind>][1]
for ind in range(0, len(args)):
arg_tuple = args[ind]
fld_name = arg_tuple[1]
arg_name = opts_list[ind]
f.write(' cb_data.args.' + name + '.' + fld_name + ' = ' + arg_name + '; \\\n')
f.write('};\n')
f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n')
# Generating the method for the API string, name and parameters
f.write('\n')
f.write('#if 0\n')
f.write('// HIP API string method, method name and parameters\n')
f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n')
f.write(' std::ostringstream oss;\n')
f.write(' switch (id) {\n')
for name, args in api_map.items():
f.write(' case HIP_API_ID_' + name + ':\n')
f.write(' oss << "' + name + '("')
for ind in range(0, len(args)):
arg_tuple = args[ind]
arg_name = arg_tuple[1]
if ind != 0: f.write(' << ","')
f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name)
f.write('\n << ")";\n')
f.write(' break;\n')
f.write(' default: oss << "unknown";\n')
f.write(' };\n')
f.write(' return strdup(oss.str().c_str());\n')
f.write('};\n')
f.write('#endif\n')
f.write('#endif // _HIP_PROF_STR_H\n');
#############################################################
# main
# Usage
if (len(sys.argv) > 1) and (sys.argv[1] == '-v'):
verbose = 1
sys.argv.pop(1)
if (len(sys.argv) < 3):
fatal ("Usage: " + sys.argv[0] + " [-v] <input HIP API .h file> <patched srcs path>\n" +
" -v - verbose messages\n" +
" example:\n" +
" $ hipap.py hip/include/hip/hcc_detail/hip_runtime_api.h hip/src")
# API header file given as an argument
api_hfile = sys.argv[1]
if not os.path.isfile(api_hfile):
fatal("input file '" + api_hfile + "' not found")
# Srcs directory given as an argument
src_pat = "\.cpp$"
src_dir = sys.argv[2]
if not os.path.isdir(src_dir):
fatal("src directory " + src_dir + "' not found")
if len(sys.argv) > 3: OUTPUT = sys.argv[3]
# API declaration map
api_map = {
'hipHccModuleLaunchKernel': ''
}
# API options map
opts_map = {}
# Parsing API header
parse_api(api_hfile, api_map)
# Parsing sources
parse_src(api_map, src_dir, src_pat, opts_map)
# Checking for non-conformant APIs
for name in opts_map.keys():
m = re.match(r'\.(\S*)', name)
if m:
message("Init missing: " + m.group(1))
del opts_map[name]
# Converting api map to map of lists
# Checking for not found APIs
not_found = 0
if len(opts_map) != 0:
for name in api_map.keys():
args_str = api_map[name];
api_map[name] = list_api_args(args_str)
if not name in opts_map:
fatal("not found: " + name)
not_found += 1
if not_found != 0:
fatal(not_found + " API calls not found")
# Generating output header file
with open(OUTPUT, 'w') as f:
generate_prof_header(f, api_map, opts_map)
# Successfull exit
sys.exit(0)
+14 -9
Просмотреть файл
@@ -12,17 +12,22 @@ set ( LIB_SRC
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
# Generating HSA tracing primitives
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
set ( KFD_LIB "kfdwrapper64" )
set ( KFD_LIB_SRC
${LIB_DIR}/kfd/kfd_wrapper.cpp
)
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
# Generating KFD/Thunk tracing primitives
if ( DEFINED KFD_WRAPPER )
set ( KFD_LIB "kfdwrapper64" )
set ( KFD_LIB_SRC
${LIB_DIR}/kfd/kfd_wrapper.cpp
)
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
endif()
set ( ROCTX_LIB "roctx64" )
set ( ROCTX_LIB_SRC
+102
Просмотреть файл
@@ -0,0 +1,102 @@
/*
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef SRC_CORE_JOURNAL_H_
#define SRC_CORE_JOURNAL_H_
#include <map>
#include <mutex>
namespace roctracer {
template <class Data>
class Journal {
public:
typedef std::mutex mutex_t;
typedef std::map<uint32_t, Data> domain_map_t;
typedef std::map<uint32_t, domain_map_t*> journal_map_t;
struct record_t {
uint32_t domain;
uint32_t op;
Data data;
};
Journal() {
domain_mask_ = 0;
map_ = new journal_map_t;
}
~Journal() {
for (auto& val : map_) delete val.second;
delete map_;
}
void registr(const record_t& record) {
std::lock_guard<mutex_t> lck(mutex_);
auto* map = get_domain_map(record.domain);
map->insert({record.op, record.data});
}
void remove(const record_t& record) {
std::lock_guard<mutex_t> lck(mutex_);
auto* map = get_domain_map(record.domain);
map->erase(record.op);
}
template <class F>
F foreach(const F& f_i) {
std::lock_guard<mutex_t> lck(mutex_);
F f = f_i;
for (uint32_t domain = 0, mask = domain_mask_; mask != 0; ++domain, mask >>= 1) {
if (mask & 1) {
auto map = get_domain_map(domain);
auto begin = map->begin();
auto end = map->end();
for (auto it = begin; it != end; ++it) {
if (f.fun({domain, it->first, it->second}) == false) break;
}
}
}
return f;
}
private:
domain_map_t* get_domain_map(const uint32_t& domain) {
domain_mask_ |= 1u << domain;
auto domain_it = map_->find(domain);
if (domain_it == map_->end()) {
auto* domain_map = new domain_map_t;
auto ret = map_->insert({domain, domain_map});
domain_it = ret.first;
}
return domain_it->second;
}
mutex_t mutex_;
journal_map_t* map_;
uint32_t domain_mask_;
};
} // namespace roctracer
#endif // SRC_CORE_JOURNAL_H_
+26 -25
Просмотреть файл
@@ -29,12 +29,12 @@ class BaseLoader : public T {
return f;
}
static inline loader_t& Instance(const bool& preload = false) {
static inline loader_t& Instance() {
loader_t* obj = instance_.load(std::memory_order_acquire);
if (obj == NULL) {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_.load(std::memory_order_relaxed) == NULL) {
obj = new loader_t(preload);
obj = new loader_t();
instance_.store(obj, std::memory_order_release);
}
}
@@ -45,11 +45,11 @@ class BaseLoader : public T {
static void SetLibName(const char *name) { lib_name_ = name; }
private:
BaseLoader(bool preload) {
const int flags = (preload) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD;
BaseLoader() {
const int flags = RTLD_LAZY;
handle_ = dlopen(lib_name_, flags);
if ((handle_ == NULL) && (strong_ld_check_)) {
fprintf(stderr, "roctracer: Loading '%s' failed, preload(%d), %s\n", lib_name_, (int)preload, dlerror());
if (handle_ == NULL) {
fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror());
abort();
}
dlerror();
@@ -64,7 +64,6 @@ class BaseLoader : public T {
static mutex_t mutex_;
static const char* lib_name_;
static std::atomic<loader_t*> instance_;
static const bool strong_ld_check_;
void* handle_;
};
@@ -99,26 +98,26 @@ class HipApi {
};
// HCC runtime library loader class
#include "inc/roctracer_hcc.h"
class HccApi {
public:
typedef BaseLoader<HccApi> Loader;
typedef decltype(Kalmar::CLAMP::InitActivityCallback) InitActivityCallback_t;
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) EnableActivityCallback_t;
typedef decltype(Kalmar::CLAMP::GetCmdName) GetCmdName_t;
InitActivityCallback_t* InitActivityCallback;
EnableActivityCallback_t* EnableActivityCallback;
GetCmdName_t* GetCmdName;
hipInitAsyncActivityCallback_t* InitActivityCallback;
hipEnableAsyncActivityCallback_t* EnableActivityCallback;
hipGetOpName_t* GetOpName;
protected:
void init(Loader* loader) {
// Kalmar::CLAMP::InitActivityCallback
InitActivityCallback = loader->GetFun<InitActivityCallback_t>("InitActivityCallbackImpl");
// Kalmar::CLAMP::EnableActivityIdCallback
EnableActivityCallback = loader->GetFun<EnableActivityCallback_t>("EnableActivityCallbackImpl");
// Kalmar::CLAMP::GetCmdName
GetCmdName = loader->GetFun<GetCmdName_t>("GetCmdNameImpl");
#if HIP_VDI
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallback");
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallback");
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdName");
#else
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallbackImpl");
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallbackImpl");
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdNameImpl");
#endif
}
};
@@ -141,20 +140,24 @@ class KfdApi {
};
// rocTX runtime library loader class
#include "inc/roctracer_roctx.h"
class RocTxApi {
public:
typedef BaseLoader<RocTxApi> Loader;
typedef bool (RegisterApiCallback_t)(uint32_t op, void* callback, void* arg);
typedef bool (RemoveApiCallback_t)(uint32_t op);
typedef decltype(RegisterApiCallback) RegisterApiCallback_t;
typedef decltype(RemoveApiCallback) RemoveApiCallback_t;
typedef decltype(RangeStackIterate) RangeStackIterate_t;
RegisterApiCallback_t* RegisterApiCallback;
RemoveApiCallback_t* RemoveApiCallback;
RangeStackIterate_t* RangeStackIterate;
protected:
void init(Loader* loader) {
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("RegisterApiCallback");
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("RemoveApiCallback");
RangeStackIterate = loader->GetFun<RangeStackIterate_t>("RangeStackIterate");
}
};
@@ -168,11 +171,9 @@ typedef BaseLoader<RocTxApi> RocTxLoader;
#define LOADER_INSTANTIATE() \
template<class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
template<class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
template<class T> const bool roctracer::BaseLoader<T>::strong_ld_check_ = false;
template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \
template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
template<> const bool roctracer::RocTxLoader::strong_ld_check_ = false;
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so";
#endif // SRC_CORE_LOADER_H_
+210
Просмотреть файл
@@ -0,0 +1,210 @@
/*
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef MEMORY_POOL_H_
#define MEMORY_POOL_H_
#include <pthread.h>
#include <stdlib.h>
#include <atomic>
#include <mutex>
#include "util/exception.h"
#define PTHREAD_CALL(call) \
do { \
int err = call; \
if (err != 0) { \
errno = err; \
perror(#call); \
abort(); \
} \
} while (0)
namespace roctracer {
class MemoryPool {
public:
typedef std::mutex mutex_t;
static void allocator_default(char** ptr, size_t size, void* arg) {
(void)arg;
if (*ptr == NULL) {
*ptr = reinterpret_cast<char*>(malloc(size));
} else if (size != 0) {
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
} else {
free(*ptr);
*ptr = NULL;
}
}
MemoryPool(const roctracer_properties_t& properties) {
// Assigning pool allocator
alloc_fun_ = allocator_default;
alloc_arg_ = NULL;
if (properties.alloc_fun != NULL) {
alloc_fun_ = properties.alloc_fun;
alloc_arg_ = properties.alloc_arg;
}
// Pool definition
buffer_size_ = properties.buffer_size;
const size_t pool_size = 2 * buffer_size_;
pool_begin_ = NULL;
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
pool_end_ = pool_begin_ + pool_size;
buffer_begin_ = pool_begin_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
// Consuming read thread
read_callback_fun_ = properties.buffer_callback_fun;
read_callback_arg_ = properties.buffer_callback_arg;
consumer_arg_.set(this, NULL, NULL, true);
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
}
~MemoryPool() {
Flush();
PTHREAD_CALL(pthread_cancel(consumer_thread_));
void *res;
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
allocator_default(&pool_begin_, 0, alloc_arg_);
}
template <typename Record>
void Write(const Record& record) {
std::lock_guard<mutex_t> lock(write_mutex_);
getRecord<Record>(record);
}
void Flush() {
std::lock_guard<mutex_t> lock(write_mutex_);
if (write_ptr_ > buffer_begin_) {
spawn_reader(buffer_begin_, write_ptr_);
sync_reader(&consumer_arg_);
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
}
}
private:
struct consumer_arg_t {
MemoryPool* obj;
const char* begin;
const char* end;
volatile std::atomic<bool> valid;
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
obj = obj_p;
begin = begin_p;
end = end_p;
valid.store(valid_p);
}
};
template <typename Record>
Record* getRecord(const Record& init) {
char* next = write_ptr_ + sizeof(Record);
if (next > buffer_end_) {
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
spawn_reader(buffer_begin_, write_ptr_);
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
next = write_ptr_ + sizeof(Record);
}
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
write_ptr_ = next;
*ptr = init;
return ptr;
}
static void reset_reader(consumer_arg_t* arg) {
arg->valid.store(false);
}
static void sync_reader(const consumer_arg_t* arg) {
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
}
static void* reader_fun(void* consumer_arg) {
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
roctracer::MemoryPool* obj = arg->obj;
reset_reader(arg);
while (1) {
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
while (arg->valid.load() == false) {
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
}
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
reset_reader(arg);
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
}
return NULL;
}
void spawn_reader(const char* data_begin, const char* data_end) {
sync_reader(&consumer_arg_);
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
consumer_arg_.set(this, data_begin, data_end, true);
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
}
// pool allocator
roctracer_allocator_t alloc_fun_;
void* alloc_arg_;
// Pool definition
size_t buffer_size_;
char* pool_begin_;
char* pool_end_;
char* buffer_begin_;
char* buffer_end_;
char* write_ptr_;
mutex_t write_mutex_;
// Consuming read thread
roctracer_buffer_callback_t read_callback_fun_;
void* read_callback_arg_;
consumer_arg_t consumer_arg_;
pthread_t consumer_thread_;
pthread_mutex_t read_mutex_;
pthread_cond_t read_cond_;
};
} // namespace roctracer
#endif // MEMORY_POOL_H_
+211 -199
Просмотреть файл
@@ -23,21 +23,27 @@ THE SOFTWARE.
#include "inc/roctracer.h"
#include "inc/roctracer_hcc.h"
#include "inc/roctracer_hip.h"
#include "inc/roctracer_ext.h"
#include "inc/roctracer_roctx.h"
#define PROF_API_IMPL 1
#include "inc/roctracer_hsa.h"
#ifdef KFD_WRAPPER
#include "inc/roctracer_kfd.h"
#endif
#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <atomic>
#include <mutex>
#include <stack>
#include <dirent.h>
#include <string.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/syscall.h>
#include "core/journal.h"
#include "core/loader.h"
#include "core/memory_pool.h"
#include "core/trace_buffer.h"
#include "proxy/tracker.h"
#include "ext/hsa_rt_utils.hpp"
@@ -54,16 +60,6 @@ THE SOFTWARE.
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define PTHREAD_CALL(call) \
do { \
int err = call; \
if (err != 0) { \
errno = err; \
perror(#call); \
abort(); \
} \
} while (0)
#define HIPAPI_CALL(call) \
do { \
hipError_t err = call; \
@@ -96,7 +92,6 @@ THE SOFTWARE.
#endif
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
///////////////////////////////////////////////////////////////////////////////////////////////////
// Mark callback
@@ -177,11 +172,52 @@ namespace roctracer {
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t;
typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t;
struct cb_journal_data_t {
roctracer_rtapi_callback_t callback;
void* user_data;
};
typedef Journal<cb_journal_data_t> CbJournal;
CbJournal* cb_journal;
struct act_journal_data_t {
roctracer_pool_t* pool;
};
typedef Journal<act_journal_data_t> ActJournal;
ActJournal* act_journal;
template <class T, class F>
struct journal_functor_t {
typedef typename T::record_t record_t;
F f_;
journal_functor_t(F f) : f_(f) {}
bool fun(const record_t& record) {
f_((activity_domain_t)record.domain, record.op);
return true;
}
};
typedef journal_functor_t<CbJournal, roctracer_enable_op_callback_t> cb_en_functor_t;
typedef journal_functor_t<CbJournal, roctracer_disable_op_callback_t> cb_dis_functor_t;
typedef journal_functor_t<ActJournal, roctracer_enable_op_activity_t> act_en_functor_t;
typedef journal_functor_t<ActJournal, roctracer_disable_op_activity_t> act_dis_functor_t;
template<> bool cb_en_functor_t::fun(const cb_en_functor_t::record_t& record) {
f_((activity_domain_t)record.domain, record.op, record.data.callback, record.data.user_data);
return true;
}
template<> bool act_en_functor_t::fun(const act_en_functor_t::record_t& record) {
f_((activity_domain_t)record.domain, record.op, record.data.pool);
return true;
}
void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry);
void hsa_kernel_handler(::proxy::Tracker::entry_t* entry);
TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
{roctracer::COPY_ENTRY_TYPE, hsa_async_copy_handler},
{roctracer::KERNEL_ENTRY_TYPE, hsa_kernel_handler}
{COPY_ENTRY_TYPE, hsa_async_copy_handler},
{KERNEL_ENTRY_TYPE, hsa_kernel_handler}
};
TraceBuffer<trace_entry_t> trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2);
@@ -199,7 +235,12 @@ CoreApiTable CoreApiTable_saved{};
AmdExtTable AmdExtTable_saved{};
// Table of function pointers to HSA Image Extension
ImageExtTable ImageExtTable_saved{};
}
} // namespace hsa_support
namespace ext_support {
roctracer_start_cb_t roctracer_start_cb = NULL;
roctracer_stop_cb_t roctracer_stop_cb = NULL;
} // namespace ext_suppoprt
roctracer_status_t GetExcStatus(const std::exception& e) {
const util::exception* roctracer_exc_ptr = dynamic_cast<const util::exception*>(&e);
@@ -223,167 +264,19 @@ class GlobalCounter {
GlobalCounter::mutex_t GlobalCounter::mutex_;
GlobalCounter::counter_t GlobalCounter::counter_ = 0;
class MemoryPool {
public:
typedef std::mutex mutex_t;
static void allocator_default(char** ptr, size_t size, void* arg) {
(void)arg;
if (*ptr == NULL) {
*ptr = reinterpret_cast<char*>(malloc(size));
} else if (size != 0) {
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
} else {
free(*ptr);
*ptr = NULL;
}
}
MemoryPool(const roctracer_properties_t& properties) {
// Assigning pool allocator
alloc_fun_ = allocator_default;
alloc_arg_ = NULL;
if (properties.alloc_fun != NULL) {
alloc_fun_ = properties.alloc_fun;
alloc_arg_ = properties.alloc_arg;
}
// Pool definition
buffer_size_ = properties.buffer_size;
const size_t pool_size = 2 * buffer_size_;
pool_begin_ = NULL;
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
pool_end_ = pool_begin_ + pool_size;
buffer_begin_ = pool_begin_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
// Consuming read thread
read_callback_fun_ = properties.buffer_callback_fun;
read_callback_arg_ = properties.buffer_callback_arg;
consumer_arg_.set(this, NULL, NULL, true);
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
}
~MemoryPool() {
Flush();
PTHREAD_CALL(pthread_cancel(consumer_thread_));
void *res;
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
allocator_default(&pool_begin_, 0, alloc_arg_);
}
template <typename Record>
void Write(const Record& record) {
std::lock_guard<mutex_t> lock(write_mutex_);
getRecord<Record>(record);
}
void Flush() {
std::lock_guard<mutex_t> lock(write_mutex_);
if (write_ptr_ > buffer_begin_) {
spawn_reader(buffer_begin_, write_ptr_);
sync_reader(&consumer_arg_);
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
}
}
private:
struct consumer_arg_t {
MemoryPool* obj;
const char* begin;
const char* end;
volatile std::atomic<bool> valid;
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
obj = obj_p;
begin = begin_p;
end = end_p;
valid.store(valid_p);
}
// Records storage
struct roctracer_api_data_t {
union {
hip_api_data_t hip;
};
template <typename Record>
Record* getRecord(const Record& init) {
char* next = write_ptr_ + sizeof(Record);
if (next > buffer_end_) {
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
spawn_reader(buffer_begin_, write_ptr_);
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
next = write_ptr_ + sizeof(Record);
}
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
write_ptr_ = next;
*ptr = init;
return ptr;
}
static void reset_reader(consumer_arg_t* arg) {
arg->valid.store(false);
}
static void sync_reader(const consumer_arg_t* arg) {
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
}
static void* reader_fun(void* consumer_arg) {
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
roctracer::MemoryPool* obj = arg->obj;
reset_reader(arg);
while (1) {
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
while (arg->valid.load() == false) {
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
}
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
reset_reader(arg);
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
}
return NULL;
}
void spawn_reader(const char* data_begin, const char* data_end) {
sync_reader(&consumer_arg_);
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
consumer_arg_.set(this, data_begin, data_end, true);
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
}
// pool allocator
roctracer_allocator_t alloc_fun_;
void* alloc_arg_;
// Pool definition
size_t buffer_size_;
char* pool_begin_;
char* pool_end_;
char* buffer_begin_;
char* buffer_end_;
char* write_ptr_;
mutex_t write_mutex_;
// Consuming read thread
roctracer_buffer_callback_t read_callback_fun_;
void* read_callback_arg_;
consumer_arg_t consumer_arg_;
pthread_t consumer_thread_;
pthread_mutex_t read_mutex_;
pthread_cond_t read_cond_;
roctracer_api_data_t() {};
};
struct record_pair_t {
roctracer_record_t record;
roctracer_api_data_t data;
record_pair_t() {};
};
static thread_local std::stack<record_pair_t> record_pair_stack;
// Correlation id storage
static thread_local activity_correlation_id_t correlation_id_tls = 0;
@@ -407,7 +300,7 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre
return it->second;
}
roctracer_record_t* HIP_SyncActivityCallback(
void* HIP_SyncActivityCallback(
uint32_t op_id,
roctracer_record_t* record,
const void* callback_data,
@@ -416,23 +309,57 @@ roctracer_record_t* HIP_SyncActivityCallback(
static hsa_rt_utils::Timer timer;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
hip_api_data_t* data_ptr = const_cast<hip_api_data_t*>(data);
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL");
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
int phase = ACTIVITY_API_PHASE_ENTER;
if (record != NULL) {
if (data == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback: data is NULL");
phase = data->phase;
} else if (pool != NULL) {
phase = ACTIVITY_API_PHASE_EXIT;
}
if (phase == ACTIVITY_API_PHASE_ENTER) {
// Allocating a record if NULL passed
if (record == NULL) {
if (data != NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: record is NULL");
record_pair_stack.push({});
auto& top = record_pair_stack.top();
record = &(top.record);
data = &(top.data.hip);
data_ptr = const_cast<hip_api_data_t*>(data);
data_ptr->phase = phase;
}
// Filing record info
record->domain = ACTIVITY_DOMAIN_HIP_API;
record->op = op_id;
record->begin_ns = timer.timestamp_ns();
// Correlation ID generating
uint64_t correlation_id = data->correlation_id;
if (correlation_id == 0) {
correlation_id = GlobalCounter::Increment();
const_cast<hip_api_data_t*>(data)->correlation_id = correlation_id;
data_ptr->correlation_id = correlation_id;
}
record->correlation_id = correlation_id;
// Passing correlatin ID
correlation_id_tls = correlation_id;
return record;
return data_ptr;
} else {
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: pool is NULL");
// Getting record of stacked
if (record == NULL) {
if (record_pair_stack.empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty");
auto& top = record_pair_stack.top();
record = &(top.record);
}
// Filing record info
record->end_ns = timer.timestamp_ns();
record->process_id = syscall(__NR_getpid);
record->thread_id = syscall(__NR_gettid);
@@ -446,9 +373,15 @@ roctracer_record_t* HIP_SyncActivityCallback(
pool->Write(ext_record);
}
// Writing record to the buffer
pool->Write(*record);
// popping the record entry
if (!record_pair_stack.empty()) record_pair_stack.pop();
// Clearing correlatin ID
correlation_id_tls = 0;
return NULL;
}
}
@@ -627,17 +560,19 @@ PUBLIC_API const char* roctracer_op_string(
break;
}
case ACTIVITY_DOMAIN_HCC_OPS: {
return roctracer::HccLoader::Instance().GetCmdName(kind);
return roctracer::HccLoader::Instance().GetOpName(kind);
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
return roctracer::HipLoader::Instance().ApiName(op);
break;
}
#if KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
return roctracer::kfd_support::GetApiName(op);
break;
}
#endif
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
@@ -658,11 +593,13 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
if (kind != NULL) *kind = 0;
break;
}
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
*op = roctracer::kfd_support::GetApiCode(str);
if (kind != NULL) *kind = 0;
break;
}
#endif
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
}
@@ -673,9 +610,11 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: return 1;
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER;
case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER;
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
#endif
case ACTIVITY_DOMAIN_EXT_API: return 0;
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
default:
@@ -685,18 +624,20 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
}
// Enable runtime API callbacks
static void roctracer_enable_callback_impl(
uint32_t domain,
static roctracer_status_t roctracer_enable_callback_fun(
roctracer_domain_t domain,
uint32_t op,
roctracer_rtapi_callback_t callback,
void* user_data)
{
switch (domain) {
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: {
roctracer::hsa_support::cb_table.set(op, callback, user_data);
@@ -718,6 +659,17 @@ static void roctracer_enable_callback_impl(
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
return ROCTRACER_STATUS_SUCCESS;
}
static void roctracer_enable_callback_impl(
uint32_t domain,
uint32_t op,
roctracer_rtapi_callback_t callback,
void* user_data)
{
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
}
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(
@@ -755,16 +707,18 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback(
}
// Disable runtime API callbacks
static void roctracer_disable_callback_impl(
uint32_t domain,
static roctracer_status_t roctracer_disable_callback_fun(
roctracer_domain_t domain,
uint32_t op)
{
switch (domain) {
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: break;
@@ -783,6 +737,15 @@ static void roctracer_disable_callback_impl(
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
return ROCTRACER_STATUS_SUCCESS;
}
static void roctracer_disable_callback_impl(
uint32_t domain,
uint32_t op)
{
roctracer::cb_journal->remove({domain, op, {}});
roctracer_disable_callback_fun((roctracer_domain_t)domain, op);
}
PUBLIC_API roctracer_status_t roctracer_disable_op_callback(
@@ -850,8 +813,8 @@ PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) {
}
// Enable activity records logging
static void roctracer_enable_activity_impl(
uint32_t domain,
static roctracer_status_t roctracer_enable_activity_fun(
roctracer_domain_t domain,
uint32_t op,
roctracer_pool_t* pool)
{
@@ -883,6 +846,16 @@ static void roctracer_enable_activity_impl(
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
return ROCTRACER_STATUS_SUCCESS;
}
static void roctracer_enable_activity_impl(
uint32_t domain,
uint32_t op,
roctracer_pool_t* pool)
{
roctracer::act_journal->registr({domain, op, {pool}});
roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool);
}
PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
@@ -917,8 +890,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity(
}
// Disable activity records logging
static void roctracer_disable_activity_impl(
uint32_t domain,
static roctracer_status_t roctracer_disable_activity_fun(
roctracer_domain_t domain,
uint32_t op)
{
switch (domain) {
@@ -943,6 +916,15 @@ static void roctracer_disable_activity_impl(
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
return ROCTRACER_STATUS_SUCCESS;
}
static void roctracer_disable_activity_impl(
uint32_t domain,
uint32_t op)
{
roctracer::act_journal->remove({domain, op, {}});
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
}
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(
@@ -1011,10 +993,24 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act
// Mark API
PUBLIC_API void roctracer_mark(const char* str) {
if (mark_api_callback_ptr) {
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
}
if (mark_api_callback_ptr) {
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
}
}
// Start API
PUBLIC_API void roctracer_start() {
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
}
// Stop API
PUBLIC_API void roctracer_stop() {
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
}
// Set properties
@@ -1032,11 +1028,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
roctracer::hsa_support::output_prefix = ops_properties->output_prefix;
#if 0
// HSA dispatches intercepting
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
rocprofiler::ProxyQueue::HsaIntercept(table);
rocprofiler::InterceptQueue::HsaIntercept(table);
#endif
// HSA async-copy tracing
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
@@ -1048,10 +1046,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
break;
}
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
roctracer::kfd_support::intercept_KFDApiTable();
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_API: {
// HSA API properties
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
@@ -1062,9 +1062,19 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
}
case ACTIVITY_DOMAIN_HCC_OPS:
case ACTIVITY_DOMAIN_HIP_API: {
const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB");
if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name);
#ifdef HIP_VDI
const char* hip_lib_name = "libamdhip64.so";
roctracer::HccLoader::SetLibName(hip_lib_name);
roctracer::HipLoader::SetLibName(hip_lib_name);
#endif
mark_api_callback_ptr = reinterpret_cast<mark_api_callback_t*>(properties);
break;
}
case ACTIVITY_DOMAIN_EXT_API: {
roctracer_ext_properties_t* ops_properties = reinterpret_cast<roctracer_ext_properties_t*>(properties);
roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb;
roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb;
break;
}
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
@@ -1113,6 +1123,8 @@ PUBLIC_API void OnUnload() {
CONSTRUCTOR_API void constructor() {
if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); }
roctracer::util::Logger::Create();
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); }
}
+44 -12
Просмотреть файл
@@ -24,11 +24,13 @@ THE SOFTWARE.
#include "inc/roctracer_roctx.h"
#include <string.h>
#include <map>
#include <mutex>
#include <stack>
#include "inc/ext/prof_protocol.h"
#include "util/exception.h"
#include "util/logger.h"
#include <stack>
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
@@ -62,12 +64,8 @@ THE SOFTWARE.
(void)err; \
return X;
static thread_local std::stack<std::string> message_stack;
#if 0
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
#endif
inline uint32_t GetPid() { return syscall(__NR_getpid); }
inline uint32_t GetTid() { return syscall(__NR_gettid); }
////////////////////////////////////////////////////////////////////////////////
// Library errors enumaration
@@ -80,12 +78,27 @@ typedef enum {
// Library implementation
//
namespace roctx {
typedef std::stack<std::string> message_stack_t;
typedef std::map<uint32_t, message_stack_t*> thread_map_t;
typedef std::mutex map_mutex_t;
map_mutex_t map_mutex;
thread_map_t* thread_map = NULL;
static thread_local message_stack_t* message_stack = NULL;
roctx_status_t GetExcStatus(const std::exception& e) {
const roctracer::util::exception* roctx_exc_ptr = dynamic_cast<const roctracer::util::exception*>(&e);
return (roctx_exc_ptr) ? static_cast<roctx_status_t>(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR;
}
void thread_data_init() {
message_stack = new message_stack_t;
const auto tid = GetTid();
std::lock_guard<map_mutex_t> lck(map_mutex);
if (thread_map == NULL) thread_map = new thread_map_t;
(*thread_map)[tid] = message_stack;
}
// callbacks table
extern cb_table_t cb_table;
} // namespace roctx
@@ -119,31 +132,50 @@ PUBLIC_API void roctxMarkA(const char* message) {
PUBLIC_API int roctxRangePushA(const char* message) {
API_METHOD_PREFIX
if (roctx::message_stack == NULL) roctx::thread_data_init();
roctx_api_data_t api_data{};
api_data.args.roctxRangePushA.message = strdup(message);
activity_rtapi_callback_t api_callback_fun = NULL;
void* api_callback_arg = NULL;
roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg);
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg);
message_stack.push(strdup(message));
roctx::message_stack->push(strdup(message));
return roctx::message_stack->size() - 1;
API_METHOD_CATCH(-1);
return message_stack.size()-1;
}
PUBLIC_API int roctxRangePop() {
API_METHOD_PREFIX
if (roctx::message_stack == NULL) roctx::thread_data_init();
roctx_api_data_t api_data{};
activity_rtapi_callback_t api_callback_fun = NULL;
void* api_callback_arg = NULL;
roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg);
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg);
if (message_stack.empty()) {
if (roctx::message_stack->empty()) {
EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!");
} else {
message_stack.pop();
roctx::message_stack->pop();
}
return roctx::message_stack->size();
API_METHOD_CATCH(-1)
return message_stack.size();
}
PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) {
for (const auto& entry : *roctx::thread_map) {
const auto tid = entry.first;
for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){
std::string message = stack.top();
roctx_range_data_t data{};
data.message = message.c_str();
data.tid = tid;
callback(&data, arg);
}
}
}
} // extern "C"
+1 -1
Просмотреть файл
@@ -33,7 +33,7 @@ set ( RUN_SCRIPT "${TEST_DIR}/run.sh" )
add_custom_target( mytest
COMMAND make -C "${TEST_DIR}/MatrixTranspose"
COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test"
COMMAND make -C "${TEST_DIR}/MatrixTranspose_test"
COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test"
COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test"
)
+10
Просмотреть файл
@@ -36,6 +36,10 @@ THE SOFTWARE.
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Mark API
extern "C"
void roctracer_mark(const char* str);
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
@@ -82,6 +86,10 @@ int main() {
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
uint32_t iterations = 100;
while (iterations-- > 0) {
std::cout << "## Iteration (" << iterations << ") #################" << std::endl;
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
@@ -112,6 +120,8 @@ int main() {
printf("PASSED!\n");
}
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
+2 -1
Просмотреть файл
@@ -3,6 +3,7 @@ LIB_PATH = $(ROOT_PATH)/build
ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64
HSA_KMT_INC_PATH ?= /opt/rocm/include
export LD_LIBRARY_PATH=$(LIB_PATH)
HIP_VDI ?= 0
ITERATIONS ?= 100
HIP_PATH?= $(wildcard /opt/rocm/hip)
@@ -24,7 +25,7 @@ EXECUTABLE=./MatrixTranspose
all: clean $(EXECUTABLE)
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS)
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS)
CXX=$(HIPCC)
$(EXECUTABLE): $(OBJECTS)
+19 -11
Просмотреть файл
@@ -35,7 +35,7 @@ THE SOFTWARE.
#include <inc/roctracer_kfd.h>
#ifndef ITERATIONS
# define ITERATIONS 100
# define ITERATIONS 101
#endif
#define WIDTH 1024
@@ -167,9 +167,9 @@ int main() {
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
}
stop_tracing();
}
return errors;
}
@@ -285,7 +285,7 @@ void activity_callback(const char* begin, const char* end, void* arg) {
record->device_id,
record->queue_id
);
if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
fprintf(stdout, " external_id(%lu)",
record->external_id
@@ -302,31 +302,39 @@ void activity_callback(const char* begin, const char* end, void* arg) {
// Init tracing routine
void init_tracing() {
std::cout << "# START #############################" << std::endl << std::flush;
std::cout << "# INIT #############################" << std::endl << std::flush;
// roctracer properties
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL);
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
// Enable HIP API callbacks
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL));
// Enable HIP activity tracing
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
}
// Start tracing routine
void start_tracing() {
std::cout << "# START #############################" << std::endl << std::flush;
// Enable HIP API callbacks
ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL));
// Enable HIP activity tracing
ROCTRACER_CALL(roctracer_enable_activity());
std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush;
// Start
if ((iterations & 1) == 1) roctracer_start();
else roctracer_stop();
}
// Stop tracing routine
void stop_tracing() {
ROCTRACER_CALL(roctracer_disable_callback());
ROCTRACER_CALL(roctracer_disable_activity());
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
ROCTRACER_CALL(roctracer_flush_activity());
std::cout << "# STOP #############################" << std::endl << std::flush;
}
#else
void init_tracing() {}
void start_tracing() {}
void stop_tracing() {}
#endif
+10 -2
Просмотреть файл
@@ -33,6 +33,12 @@ if [ -n "$1" ] ; then
test_filter=$1
fi
# debugger
debugger=""
if [ -n "$2" ] ; then
debugger=$2
fi
# test check routin
test_status=0
test_runnum=0
@@ -46,7 +52,7 @@ eval_test() {
if [ $test_filter = -1 -o $test_filter = $test_number ] ; then
echo "$label: \"$cmdline\""
test_runnum=$((test_runnum + 1))
eval "$cmdline"
eval "$debugger $cmdline"
if [ $? != 0 ] ; then
echo "$label: FAILED"
test_status=$(($test_status + 1))
@@ -59,7 +65,7 @@ eval_test() {
# Standalone test
# rocTrecer is used explicitely by test
eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test"
eval_test "standalone HIP test" "./test/MatrixTranspose_test"
# Tool test
# rocTracer/tool is loaded by HSA runtime
@@ -68,6 +74,8 @@ export ROCTRACER_DOMAIN="hip"
# HIP test
eval_test "tool HIP test" ./test/MatrixTranspose
# with trace sampling control <delay:length:rate>
eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose"
# HSA test
export ROCTRACER_DOMAIN="hsa"
+289 -78
Просмотреть файл
@@ -25,13 +25,21 @@ THE SOFTWARE.
#include <cxxabi.h> /* names denangle */
#include <dirent.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <string.h>
#include <sys/syscall.h> /* SYS_xxx definitions */
#include <sys/types.h>
#include <unistd.h> /* usleep */
#include <inc/roctracer_ext.h>
#include <inc/roctracer_roctx.h>
#include <inc/roctracer_hsa.h>
#include <inc/roctracer_hip.h>
#include <inc/roctracer_hcc.h>
#ifdef KFD_WRAPPER
#include <inc/roctracer_kfd.h>
#endif
#include <inc/ext/hsa_rt_utils.hpp>
#include <src/core/loader.h>
#include <src/core/trace_buffer.h>
@@ -60,14 +68,17 @@ hsa_rt_utils::Timer* timer = NULL;
thread_local timestamp_t hsa_begin_timestamp = 0;
thread_local timestamp_t hip_begin_timestamp = 0;
thread_local timestamp_t kfd_begin_timestamp = 0;
bool trace_roctx = false;
bool trace_hsa_api = false;
bool trace_hsa_activity = false;
bool trace_hip = false;
bool trace_hip_api = false;
bool trace_hip_activity = false;
bool trace_kfd = false;
LOADER_INSTANTIATE();
// Global output file handle
FILE* roctx_file_handle = NULL;
FILE* hsa_api_file_handle = NULL;
FILE* hsa_async_copy_file_handle = NULL;
FILE* hip_api_file_handle = NULL;
@@ -79,6 +90,7 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); }
// Error handler
void fatal(const std::string msg) {
fflush(roctx_file_handle);
fflush(hsa_api_file_handle);
fflush(hsa_async_copy_file_handle);
fflush(hip_api_file_handle);
@@ -90,24 +102,6 @@ void fatal(const std::string msg) {
abort();
}
// KFD API callback function
void kfd_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
kfd_begin_timestamp = timer->timestamp_fn_ns();
} else {
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
std::ostringstream os;
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
}
}
// C++ symbol demangle
static inline const char* cxx_demangle(const char* symbol) {
size_t funcnamesize;
@@ -116,6 +110,110 @@ static inline const char* cxx_demangle(const char* symbol) {
return (ret != NULL) ? ret : symbol;
}
// Tracing control thread
uint32_t control_delay_us = 0;
uint32_t control_len_us = 0;
uint32_t control_dist_us = 0;
void* control_thr_fun(void*) {
const uint32_t delay_sec = control_delay_us / 1000000;
const uint32_t delay_us = control_delay_us % 1000000;
const uint32_t len_sec = control_len_us / 1000000;
const uint32_t len_us = control_len_us % 1000000;
const uint32_t dist_sec = control_dist_us / 1000000;
const uint32_t dist_us = control_dist_us % 1000000;
bool start = true;
sleep(delay_sec);
usleep(delay_us);
while (1) {
if (start) {
start = false;
roctracer_start();
sleep(len_sec);
usleep(len_us);
} else {
start = true;
roctracer_stop();
sleep(dist_sec);
usleep(dist_us);
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// rocTX annotation tracing
struct roctx_trace_entry_t {
uint32_t valid;
uint32_t type;
uint32_t cid;
timestamp_t timestamp;
uint32_t pid;
uint32_t tid;
const char* message;
};
void roctx_flush_cb(roctx_trace_entry_t* entry);
roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}};
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1);
// rocTX callback function
static inline void roctx_callback_fun(
uint32_t domain,
uint32_t cid,
uint32_t tid,
const char* message)
{
const timestamp_t timestamp = timer->timestamp_fn_ns();
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = cid;
entry->timestamp = timestamp;
entry->pid = GetPid();
entry->tid = tid;
entry->message = (message != NULL) ? strdup(message) : NULL;
}
void roctx_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
roctx_callback_fun(domain, cid, GetTid(), data->args.message);
}
// Start/Stop callbacks
void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) {
const bool* is_stop_ptr = (bool*)arg;
const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA;
const char* message = (*is_stop_ptr == true) ? NULL : data->message;
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message);
}
void stop_callback() {
bool is_stop = true;
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
}
void start_callback() {
bool is_stop = false;
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
}
void roctx_flush_cb(roctx_trace_entry_t* entry) {
std::ostringstream os;
os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
if (entry->message != NULL) os << ":\"" << entry->message << "\"";
else os << ":\"\"";
fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HSA API tracing
struct hsa_api_trace_entry_t {
uint32_t valid;
uint32_t type;
@@ -186,6 +284,9 @@ struct hip_api_trace_entry_t {
void* ptr;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP API tracing
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}};
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1);
@@ -221,8 +322,10 @@ void hip_api_callback(
entry->ptr = *(data->args.hipMalloc.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
#if !HIP_VDI
case HIP_API_ID_hipExtModuleLaunchKernel:
case HIP_API_ID_hipHccModuleLaunchKernel:
#endif
const hipFunction_t f = data->args.hipModuleLaunchKernel.f;
if (f != NULL) {
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f));
@@ -289,8 +392,10 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
data->args.hipFree.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
#if !HIP_VDI
case HIP_API_ID_hipExtModuleLaunchKernel:
case HIP_API_ID_hipHccModuleLaunchKernel:
#endif
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n",
oss.str().c_str(),
cxx_demangle(entry->name),
@@ -328,6 +433,32 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// KFD API tracing
// KFD API callback function
#ifdef KFD_WRAPPER
void kfd_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
kfd_begin_timestamp = timer->timestamp_fn_ns();
} else {
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
std::ostringstream os;
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
}
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Input parser
std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) {
const std::string space_chars_set = " \t";
@@ -401,24 +532,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); }
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
// API traces switches
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
if (trace_domain != NULL) {
if (strncmp(trace_domain, "hsa", 3) == 0) {
trace_hsa_api = true;
trace_hsa_activity = true;
}
if (strncmp(trace_domain, "hip", 3) == 0) {
trace_hip = true;
}
if (strncmp(trace_domain, "sys", 3) == 0) {
trace_hsa_api = true;
trace_hip = true;
}
}
trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0);
// Output file
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
if (output_prefix != NULL) {
@@ -431,11 +544,41 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
}
}
// API traces switches
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
if (trace_domain != NULL) {
// ROCTX domain
if (std::string(trace_domain).find("roctx") != std::string::npos) {
trace_roctx = true;
}
// HSA/HIP domains enabling
if (std::string(trace_domain).find("hsa") != std::string::npos) {
trace_hsa_api = true;
trace_hsa_activity = true;
}
if (std::string(trace_domain).find("hip") != std::string::npos) {
trace_hip_api = true;
trace_hip_activity = true;
}
if (std::string(trace_domain).find("sys") != std::string::npos) {
trace_hsa_api = true;
trace_hip_api = true;
trace_hip_activity = true;
}
// KFD domain enabling
if (std::string(trace_domain).find("kfd") != std::string::npos) {
trace_kfd = true;
}
}
// API trace vector
std::vector<std::string> hsa_api_vec;
std::vector<std::string> kfd_api_vec;
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
// XML input
const char* xml_name = getenv("ROCP_INPUT");
if (xml_name != NULL) {
@@ -458,23 +601,28 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
break;
}
if (name == "rocTX") {
found = true;
trace_roctx = true;
}
if (name == "HSA") {
found = true;
trace_hsa_api = true;
hsa_api_vec = api_vec;
}
if (name == "KFD") {
found = true;
trace_kfd = true;
kfd_api_vec = api_vec;
}
if (name == "GPU") {
found = true;
trace_hsa_activity = true;
}
if (name == "HIP") {
found = true;
trace_hip = true;
trace_hip_api = true;
trace_hip_activity = true;
}
if (name == "KFD") {
found = true;
trace_kfd = true;
kfd_api_vec = api_vec;
}
}
@@ -482,7 +630,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
}
printf("\n");
// Enable HSA API callbacks
// Disable HIP activity if HSA activity was set
if (trace_hsa_activity == true) trace_hip_activity = false;
// Enable rpcTX callbacks
if (trace_roctx) {
roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt");
// initialize HSA tracing
roctracer_ext_properties_t properties {
start_callback,
stop_callback
};
roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties);
fprintf(stdout, " rocTX-trace()\n"); fflush(stdout);
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
}
// Enable HSA API callbacks/activity
if (trace_hsa_api) {
hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt");
@@ -504,6 +670,76 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
printf(")\n");
}
// Enable HSA GPU activity
if (trace_hsa_activity) {
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
// initialize HSA tracing
roctracer::hsa_ops_properties_t ops_properties {
table,
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
NULL,
output_prefix
};
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
}
// Enable HIP API callbacks/activity
if (trace_hip_api || trace_hip_activity) {
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
// roctracer properties
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x80000;
properties.buffer_callback_fun = hcc_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
if (trace_hip_api) {
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
}
if (trace_hip_activity) {
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
}
}
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
if (ctrl_str != NULL) {
uint32_t ctrl_delay = 0;
uint32_t ctrl_len = 0;
uint32_t ctrl_rate = 0;
int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
if (ret != 3) {
fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str);
abort();
}
if (ctrl_len > ctrl_rate) {
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
abort();
}
control_dist_us = ctrl_rate - ctrl_len;
control_len_us = ctrl_len;
control_delay_us = ctrl_delay;
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
roctracer_stop();
pthread_t thread;
pthread_attr_t attr;
int err = pthread_attr_init(&attr);
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
}
#ifdef KFD_WRAPPER
// Enable KFD API callbacks/activity
if (trace_kfd) {
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
// initialize KFD tracing
@@ -523,38 +759,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
}
printf(")\n");
}
if (trace_hsa_activity) {
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
// initialize HSA tracing
roctracer::hsa_ops_properties_t ops_properties{
table,
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
NULL,
output_prefix};
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
}
// Enable HIP API callbacks/activity
if (trace_hip) {
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x80000;
properties.buffer_callback_fun = hcc_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
}
#endif
if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); }
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
@@ -570,6 +775,12 @@ void tool_unload(bool destruct) {
is_unloaded = true;
roctracer_unload(destruct);
if (trace_roctx) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
roctx_trace_buffer.Flush();
close_output_file(roctx_file_handle);
}
if (trace_hsa_api) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
@@ -581,7 +792,7 @@ void tool_unload(bool destruct) {
close_output_file(hsa_async_copy_file_handle);
}
if (trace_hip) {
if (trace_hip_api || trace_hip_activity) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));