Merge branch 'amd-master' into rkebichi-patch-kfd-1
[ROCm/roctracer commit: 2bcf1c5554]
Этот коммит содержится в:
@@ -109,6 +109,11 @@ install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so )
|
||||
|
||||
## KFD wrapper
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib )
|
||||
endif ()
|
||||
|
||||
## Packaging directives
|
||||
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
|
||||
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" )
|
||||
|
||||
@@ -17,6 +17,8 @@ if [ -z "$HCC_HOME" ] ; then export HCC_HOME="$ROCM_PATH/hcc"; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi
|
||||
|
||||
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
|
||||
MAKE_OPTS="-j 8 -C $BUILD_DIR"
|
||||
@@ -27,10 +29,11 @@ pushd $BUILD_DIR
|
||||
cmake \
|
||||
-DCMAKE_MODULE_PATH=$ROCTRACER_ROOT/cmake_modules \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_PREFIX_PATH="$PACKAGE_ROOT" \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
|
||||
-DCPACK_GENERATOR="DEB;RPM" \
|
||||
$HIP_VDI_OPT \
|
||||
$ROCTRACER_ROOT
|
||||
make
|
||||
make mytest
|
||||
|
||||
@@ -43,6 +43,18 @@ if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
|
||||
add_definitions ( -DDEBUG_TRACE=1 )
|
||||
endif()
|
||||
|
||||
## Enable HIP_VDI mode
|
||||
if ( DEFINED HIP_VDI )
|
||||
add_definitions ( -DHIP_VDI=${HIP_VDI} )
|
||||
else()
|
||||
set ( HIP_VDI 0 )
|
||||
endif()
|
||||
|
||||
## Enable KFD wrapper
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} )
|
||||
endif()
|
||||
|
||||
## Enable HIP/HCC local build
|
||||
if ( DEFINED LOCAL_BUILD )
|
||||
add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} )
|
||||
@@ -123,5 +135,7 @@ message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" )
|
||||
message ( "-------------HCC-Inc: ${HCC_INC_DIR}" )
|
||||
message ( "-------------HIP-Inc: ${HIP_INC_DIR}" )
|
||||
message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" )
|
||||
message ( "-------------HIP-VDI: ${HIP_VDI}" )
|
||||
message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" )
|
||||
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
|
||||
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
|
||||
|
||||
@@ -31,6 +31,7 @@ typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
@@ -83,7 +84,7 @@ struct activity_record_t {
|
||||
};
|
||||
|
||||
// Activity sync calback type
|
||||
typedef activity_record_t* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
|
||||
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
|
||||
// Activity async calback type
|
||||
typedef void (*activity_id_callback_t)(activity_correlation_id_t id);
|
||||
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
|
||||
|
||||
@@ -34,13 +34,25 @@ THE SOFTWARE.
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
typedef void (*roctracer_start_cb_t)();
|
||||
typedef void (*roctracer_stop_cb_t)();
|
||||
typedef struct {
|
||||
roctracer_start_cb_t start_cb;
|
||||
roctracer_stop_cb_t stop_cb;
|
||||
} roctracer_ext_properties_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Mark API
|
||||
void roctracer_mark(const char* str);
|
||||
// Application annotatin API
|
||||
|
||||
// Tracing start API
|
||||
void roctracer_start();
|
||||
|
||||
// Tracing stop API
|
||||
void roctracer_stop();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// External correlation id API
|
||||
|
||||
@@ -23,11 +23,26 @@ THE SOFTWARE.
|
||||
#ifndef INC_ROCTRACER_HCC_H_
|
||||
#define INC_ROCTRACER_HCC_H_
|
||||
|
||||
#if HIP_VDI
|
||||
#define HIP_OP_ID_NUMBER 3
|
||||
#define HIP_OP_ID_COPY 1
|
||||
extern "C" {
|
||||
typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
|
||||
typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
|
||||
typedef const char* (hipGetOpName_t)(unsigned op);
|
||||
}
|
||||
#else // !HIP_VDI
|
||||
#if LOCAL_BUILD
|
||||
#include <hc_prof_runtime.h>
|
||||
#else
|
||||
#include <hcc/hc_prof_runtime.h>
|
||||
#endif
|
||||
#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER
|
||||
#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY
|
||||
typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t;
|
||||
#endif // !HIP_VDI
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
|
||||
@@ -77,6 +77,14 @@ bool RegisterApiCallback(uint32_t op, void* callback, void* arg);
|
||||
// Remove ROCTX callback for given opertaion id
|
||||
bool RemoveApiCallback(uint32_t op);
|
||||
|
||||
// Iterate range stack to support tracing start/stop
|
||||
typedef struct {
|
||||
const char* message;
|
||||
uint32_t tid;
|
||||
} roctx_range_data_t;
|
||||
typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg);
|
||||
void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -1,454 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
import os, sys, re
|
||||
|
||||
verbose = 0
|
||||
PROF_HEADER = "hip_prof_str.h"
|
||||
OUTPUT = PROF_HEADER
|
||||
REC_MAX_LEN = 1024
|
||||
|
||||
# Fatal error termination
|
||||
inp_file = 'none'
|
||||
line_num = -1
|
||||
def fatal(msg):
|
||||
if line_num != -1:
|
||||
print >>sys.stderr, "Error: " + msg + ", file '" + inp_file + "', line (" + str(line_num) + ")"
|
||||
else:
|
||||
print >>sys.stderr, "Error: " + msg
|
||||
sys.exit(1)
|
||||
|
||||
# Verbose message
|
||||
def message(msg):
|
||||
if verbose: print >>sys.stdout, msg
|
||||
|
||||
#############################################################
|
||||
# Normalizing API arguments
|
||||
def filtr_api_args(args_str):
|
||||
args_str = re.sub(r'^\s*', r'', args_str);
|
||||
args_str = re.sub(r'\s*$', r'', args_str);
|
||||
args_str = re.sub(r'\s*,\s*', r',', args_str);
|
||||
args_str = re.sub(r'\s+', r' ', args_str);
|
||||
args_str = re.sub(r'void \*', r'void* ', args_str);
|
||||
args_str = re.sub(r'(enum|struct) ', '', args_str);
|
||||
return args_str
|
||||
|
||||
# Normalizing types
|
||||
def norm_api_types(type_str):
|
||||
type_str = re.sub(r'uint32_t', r'unsigned int', type_str)
|
||||
type_str = re.sub(r'^unsigned$', r'unsigned int', type_str)
|
||||
return type_str
|
||||
|
||||
# Creating a list of arguments [(type, name), ...]
|
||||
def list_api_args(args_str):
|
||||
args_str = filtr_api_args(args_str)
|
||||
args_list = []
|
||||
if args_str != '':
|
||||
for arg_pair in args_str.split(','):
|
||||
if arg_pair == 'void': continue
|
||||
arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair);
|
||||
m = re.match("^(.*)\s(\S+)$", arg_pair);
|
||||
if m:
|
||||
arg_type = norm_api_types(m.group(1))
|
||||
arg_name = m.group(2)
|
||||
args_list.append((arg_type, arg_name))
|
||||
else:
|
||||
fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'")
|
||||
return args_list;
|
||||
|
||||
# Creating arguments string "type0, type1, ..."
|
||||
def filtr_api_types(args_str):
|
||||
args_list = list_api_args(args_str)
|
||||
types_str = ''
|
||||
for arg_tuple in args_list:
|
||||
types_str += arg_tuple[0] + ', '
|
||||
return types_str
|
||||
|
||||
# Creating options list [opt0, opt1, ...]
|
||||
def filtr_api_opts(args_str):
|
||||
args_list = list_api_args(args_str)
|
||||
opts_list = []
|
||||
for arg_tuple in args_list:
|
||||
opts_list.append(arg_tuple[1])
|
||||
return opts_list
|
||||
#############################################################
|
||||
# Parsing API header
|
||||
# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset);
|
||||
def parse_api(inp_file_p, out):
|
||||
global inp_file
|
||||
global line_num
|
||||
inp_file = inp_file_p
|
||||
|
||||
beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\(");
|
||||
api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)");
|
||||
end_pattern = re.compile("Texture");
|
||||
hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)')
|
||||
nms_open_pattern = re.compile(r'namespace hip_impl {')
|
||||
nms_close_pattern = re.compile(r'}')
|
||||
|
||||
inp = open(inp_file, 'r')
|
||||
|
||||
found = 0
|
||||
hidden = 0
|
||||
nms_level = 0;
|
||||
record = ""
|
||||
line_num = -1
|
||||
|
||||
for line in inp.readlines():
|
||||
record += re.sub(r'^\s+', r' ', line[:-1])
|
||||
line_num += 1
|
||||
|
||||
if len(record) > REC_MAX_LEN:
|
||||
fatal("bad record \"" + record + "\"")
|
||||
|
||||
if beg_pattern.match(record) and (hidden == 0) and (nms_level == 0): found = 1
|
||||
|
||||
if found != 0:
|
||||
record = re.sub("\s__dparm\([^\)]*\)", '', record);
|
||||
m = api_pattern.match(record)
|
||||
if m:
|
||||
found = 0
|
||||
if end_pattern.search(record): break
|
||||
out[m.group(2)] = m.group(3)
|
||||
else: continue
|
||||
|
||||
hidden = 0
|
||||
if hidden_pattern.match(line): hidden = 1
|
||||
|
||||
if nms_open_pattern.match(line): nms_level += 1
|
||||
if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1
|
||||
if nms_level < 0:
|
||||
fatal("nms level < 0")
|
||||
|
||||
record = ""
|
||||
|
||||
inp.close()
|
||||
line_num = -1
|
||||
#############################################################
|
||||
# Patching API implementation
|
||||
# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) {
|
||||
# HIP_INIT_CB(hipSetupArgument, arg, size, offset);
|
||||
# inp_file - input implementation source file
|
||||
# api_map - input public API map [<api name>] => <api args>
|
||||
# out - output map [<api name>] => [opt0, opt1, ...]
|
||||
def parse_content(inp_file_p, api_map, out):
|
||||
global inp_file
|
||||
global line_num
|
||||
inp_file = inp_file_p
|
||||
|
||||
# API definition begin pattern
|
||||
beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\(");
|
||||
# API definition complete pattern
|
||||
api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{");
|
||||
# API init macro pattern
|
||||
init_pattern = re.compile("^\s*HIP_INIT[_\w]*_API\(([^,]+)(,|\))");
|
||||
target_pattern = re.compile("^(\s*HIP_INIT[^\(]*)(_API\()(.*)\);\s*$");
|
||||
|
||||
# Open input file
|
||||
inp = open(inp_file, 'r')
|
||||
|
||||
# API name
|
||||
api_name = ""
|
||||
# Valid public API found flag
|
||||
api_valid = 0
|
||||
|
||||
# Input file patched content
|
||||
content = ''
|
||||
# Sub content for found API defiition
|
||||
sub_content = ''
|
||||
# Current record, accumulating several API definition related lines
|
||||
record = ''
|
||||
# Current input file line number
|
||||
line_num = -1
|
||||
# API beginning found flag
|
||||
found = 0
|
||||
|
||||
# Reading input file
|
||||
for line in inp.readlines():
|
||||
# Accumulating record
|
||||
record += re.sub(r'^\s+', r' ', line[:-1])
|
||||
line_num += 1
|
||||
|
||||
if len(record) > REC_MAX_LEN:
|
||||
fatal("bad record \"" + record + "\"")
|
||||
break;
|
||||
|
||||
# Looking for API begin
|
||||
if beg_pattern.match(record): found = 1
|
||||
|
||||
# Matching complete API definition
|
||||
if found == 1:
|
||||
record = re.sub("\s__dparm\([^\)]*\)", '', record);
|
||||
m = api_pattern.match(record)
|
||||
# Checking if complete API matched
|
||||
if m:
|
||||
found = 2
|
||||
api_name = m.group(2);
|
||||
# Checking if API name is in the API map
|
||||
if api_name in api_map:
|
||||
# Getting API arguments
|
||||
api_args = m.group(3)
|
||||
# Getting etalon arguments from the API map
|
||||
eta_args = api_map[api_name]
|
||||
if eta_args == '':
|
||||
eta_args = api_args
|
||||
api_map[api_name] = eta_args
|
||||
# Normalizing API arguments
|
||||
api_types = filtr_api_types(api_args)
|
||||
# Normalizing etalon arguments
|
||||
eta_types = filtr_api_types(eta_args)
|
||||
if api_types == eta_types:
|
||||
# API is already found
|
||||
if api_name in out:
|
||||
fatal("API redefined \"" + api_name + "\", record \"" + record + "\"")
|
||||
# Set valid public API found flag
|
||||
api_valid = 1
|
||||
# Set output API map with API arguments list
|
||||
out[api_name] = filtr_api_opts(api_args)
|
||||
else:
|
||||
# Warning about mismatched API, possible non public overloaded version
|
||||
api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types
|
||||
message("\t" + api_name + ':\n' + api_diff + '\n')
|
||||
|
||||
# API found action
|
||||
if found == 2:
|
||||
# Looking for INIT macro
|
||||
m = init_pattern.match(line)
|
||||
if m:
|
||||
found = 0
|
||||
if api_valid == 1:
|
||||
api_valid = 0
|
||||
message("\t" + api_name)
|
||||
else:
|
||||
# Registering dummy API for non public API if the name in INIT is not NONE
|
||||
init_name = m.group(1)
|
||||
# Ignore if it is initialized as NONE
|
||||
if init_name != 'NONE':
|
||||
# Check if init name matching API name
|
||||
if init_name != api_name:
|
||||
fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'")
|
||||
# If init name is not in public API map then it is private API
|
||||
# else it was not identified and will be checked on finish
|
||||
if not init_name in api_map:
|
||||
if init_name in out:
|
||||
fatal("API reinit \"" + api_name + "\", record \"" + record + "\"")
|
||||
out[init_name] = []
|
||||
elif re.search('}', line):
|
||||
found = 0
|
||||
# Expect INIT macro for valid public API
|
||||
if api_valid == 1:
|
||||
api_valid = 0
|
||||
if api_name in out:
|
||||
del out[api_name]
|
||||
del api_map[api_name]
|
||||
out['.' + api_name] = 1
|
||||
else:
|
||||
fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"")
|
||||
|
||||
if found != 1: record = ""
|
||||
content += line
|
||||
|
||||
inp.close()
|
||||
line_num = -1
|
||||
|
||||
if len(out) != 0:
|
||||
return content
|
||||
else:
|
||||
return ''
|
||||
|
||||
# src path walk
|
||||
def parse_src(api_map, src_path, src_patt, out):
|
||||
pattern = re.compile(src_patt)
|
||||
src_path = re.sub(r'\s', '', src_path)
|
||||
for src_dir in src_path.split(':'):
|
||||
message("Parsing " + src_dir + " for '" + src_patt + "'")
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for fnm in files:
|
||||
if pattern.search(fnm):
|
||||
file = root + '/' + fnm
|
||||
message(file)
|
||||
content = parse_content(file, api_map, out);
|
||||
if content != '':
|
||||
f = open(file, 'w')
|
||||
f.write(content)
|
||||
f.close()
|
||||
#############################################################
|
||||
# Generating profiling primitives header
|
||||
# api_map - public API map [<api name>] => [(type, name), ...]
|
||||
# opts_map - opts map [<api name>] => [opt0, opt1, ...]
|
||||
def generate_prof_header(f, api_map, opts_map):
|
||||
# Private API list
|
||||
priv_lst = []
|
||||
|
||||
f.write('// automatically generated sources\n')
|
||||
f.write('#ifndef _HIP_PROF_STR_H\n');
|
||||
f.write('#define _HIP_PROF_STR_H\n');
|
||||
f.write('#include <sstream>\n');
|
||||
f.write('#include <string>\n');
|
||||
|
||||
# Generating dummy macro for non-public API
|
||||
f.write('\n// Dummy API primitives\n')
|
||||
f.write('#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n')
|
||||
for name in opts_map:
|
||||
if not name in api_map:
|
||||
opts_lst = opts_map[name]
|
||||
if len(opts_lst) != 0:
|
||||
fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst))
|
||||
f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n')
|
||||
priv_lst.append(name)
|
||||
|
||||
for name in priv_lst:
|
||||
message("Private: " + name)
|
||||
|
||||
# Generating the callbacks ID enumaration
|
||||
f.write('\n// HIP API callbacks ID enumaration\n')
|
||||
f.write('enum hip_api_id_t {\n')
|
||||
cb_id = 0
|
||||
for name in api_map.keys():
|
||||
f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n')
|
||||
cb_id += 1
|
||||
f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n')
|
||||
f.write(' HIP_API_ID_ANY = ' + str(cb_id + 1) + ',\n')
|
||||
f.write('\n')
|
||||
f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n')
|
||||
for name in priv_lst:
|
||||
f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NUMBER,\n')
|
||||
f.write('};\n')
|
||||
|
||||
# Generating the callbacks ID enumaration
|
||||
f.write('\n// Return HIP API string\n')
|
||||
f.write('static const char* hip_api_name(const uint32_t& id) {\n')
|
||||
f.write(' switch(id) {\n')
|
||||
for name in api_map.keys():
|
||||
f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n')
|
||||
f.write(' };\n')
|
||||
f.write(' return "unknown";\n')
|
||||
f.write('};\n')
|
||||
|
||||
# Generating the callbacks data structure
|
||||
f.write('\n// HIP API callbacks data structure\n')
|
||||
f.write(
|
||||
'struct hip_api_data_t {\n' +
|
||||
' uint64_t correlation_id;\n' +
|
||||
' uint32_t phase;\n' +
|
||||
' union {\n'
|
||||
)
|
||||
for name, args in api_map.items():
|
||||
if len(args) != 0:
|
||||
f.write(' struct {\n')
|
||||
for arg_tuple in args:
|
||||
f.write(' ' + arg_tuple[0] + ' ' + arg_tuple[1] + ';\n')
|
||||
f.write(' } ' + name + ';\n')
|
||||
f.write(
|
||||
' } args;\n' +
|
||||
'};\n'
|
||||
)
|
||||
|
||||
# Generating the callbacks args data filling macros
|
||||
f.write('\n// HIP API callbacks args data filling macros\n')
|
||||
for name, args in api_map.items():
|
||||
f.write('// ' + name + str(args) + '\n')
|
||||
f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n')
|
||||
if name in opts_map:
|
||||
opts_list = opts_map[name]
|
||||
if len(args) != len(opts_list):
|
||||
fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list))
|
||||
# API args iterating:
|
||||
# type is args[<ind>][0]
|
||||
# name is args[<ind>][1]
|
||||
for ind in range(0, len(args)):
|
||||
arg_tuple = args[ind]
|
||||
fld_name = arg_tuple[1]
|
||||
arg_name = opts_list[ind]
|
||||
f.write(' cb_data.args.' + name + '.' + fld_name + ' = ' + arg_name + '; \\\n')
|
||||
f.write('};\n')
|
||||
f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n')
|
||||
|
||||
# Generating the method for the API string, name and parameters
|
||||
f.write('\n')
|
||||
f.write('#if 0\n')
|
||||
f.write('// HIP API string method, method name and parameters\n')
|
||||
f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n')
|
||||
f.write(' std::ostringstream oss;\n')
|
||||
f.write(' switch (id) {\n')
|
||||
for name, args in api_map.items():
|
||||
f.write(' case HIP_API_ID_' + name + ':\n')
|
||||
f.write(' oss << "' + name + '("')
|
||||
for ind in range(0, len(args)):
|
||||
arg_tuple = args[ind]
|
||||
arg_name = arg_tuple[1]
|
||||
if ind != 0: f.write(' << ","')
|
||||
f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name)
|
||||
f.write('\n << ")";\n')
|
||||
f.write(' break;\n')
|
||||
f.write(' default: oss << "unknown";\n')
|
||||
f.write(' };\n')
|
||||
f.write(' return strdup(oss.str().c_str());\n')
|
||||
f.write('};\n')
|
||||
f.write('#endif\n')
|
||||
|
||||
f.write('#endif // _HIP_PROF_STR_H\n');
|
||||
|
||||
#############################################################
|
||||
# main
|
||||
# Usage
|
||||
if (len(sys.argv) > 1) and (sys.argv[1] == '-v'):
|
||||
verbose = 1
|
||||
sys.argv.pop(1)
|
||||
|
||||
if (len(sys.argv) < 3):
|
||||
fatal ("Usage: " + sys.argv[0] + " [-v] <input HIP API .h file> <patched srcs path>\n" +
|
||||
" -v - verbose messages\n" +
|
||||
" example:\n" +
|
||||
" $ hipap.py hip/include/hip/hcc_detail/hip_runtime_api.h hip/src")
|
||||
|
||||
# API header file given as an argument
|
||||
api_hfile = sys.argv[1]
|
||||
if not os.path.isfile(api_hfile):
|
||||
fatal("input file '" + api_hfile + "' not found")
|
||||
|
||||
# Srcs directory given as an argument
|
||||
src_pat = "\.cpp$"
|
||||
src_dir = sys.argv[2]
|
||||
if not os.path.isdir(src_dir):
|
||||
fatal("src directory " + src_dir + "' not found")
|
||||
|
||||
if len(sys.argv) > 3: OUTPUT = sys.argv[3]
|
||||
|
||||
# API declaration map
|
||||
api_map = {
|
||||
'hipHccModuleLaunchKernel': ''
|
||||
}
|
||||
# API options map
|
||||
opts_map = {}
|
||||
|
||||
# Parsing API header
|
||||
parse_api(api_hfile, api_map)
|
||||
|
||||
# Parsing sources
|
||||
parse_src(api_map, src_dir, src_pat, opts_map)
|
||||
|
||||
# Checking for non-conformant APIs
|
||||
for name in opts_map.keys():
|
||||
m = re.match(r'\.(\S*)', name)
|
||||
if m:
|
||||
message("Init missing: " + m.group(1))
|
||||
del opts_map[name]
|
||||
|
||||
# Converting api map to map of lists
|
||||
# Checking for not found APIs
|
||||
not_found = 0
|
||||
if len(opts_map) != 0:
|
||||
for name in api_map.keys():
|
||||
args_str = api_map[name];
|
||||
api_map[name] = list_api_args(args_str)
|
||||
if not name in opts_map:
|
||||
fatal("not found: " + name)
|
||||
not_found += 1
|
||||
if not_found != 0:
|
||||
fatal(not_found + " API calls not found")
|
||||
|
||||
# Generating output header file
|
||||
with open(OUTPUT, 'w') as f:
|
||||
generate_prof_header(f, api_map, opts_map)
|
||||
|
||||
# Successfull exit
|
||||
sys.exit(0)
|
||||
@@ -12,17 +12,22 @@ set ( LIB_SRC
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
|
||||
|
||||
# Generating HSA tracing primitives
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
|
||||
|
||||
set ( KFD_LIB "kfdwrapper64" )
|
||||
set ( KFD_LIB_SRC
|
||||
${LIB_DIR}/kfd/kfd_wrapper.cpp
|
||||
)
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
|
||||
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
|
||||
# Generating KFD/Thunk tracing primitives
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
set ( KFD_LIB "kfdwrapper64" )
|
||||
set ( KFD_LIB_SRC
|
||||
${LIB_DIR}/kfd/kfd_wrapper.cpp
|
||||
)
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
|
||||
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
|
||||
endif()
|
||||
|
||||
set ( ROCTX_LIB "roctx64" )
|
||||
set ( ROCTX_LIB_SRC
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SRC_CORE_JOURNAL_H_
|
||||
#define SRC_CORE_JOURNAL_H_
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
template <class Data>
|
||||
class Journal {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef std::map<uint32_t, Data> domain_map_t;
|
||||
typedef std::map<uint32_t, domain_map_t*> journal_map_t;
|
||||
|
||||
struct record_t {
|
||||
uint32_t domain;
|
||||
uint32_t op;
|
||||
Data data;
|
||||
};
|
||||
|
||||
Journal() {
|
||||
domain_mask_ = 0;
|
||||
map_ = new journal_map_t;
|
||||
}
|
||||
|
||||
~Journal() {
|
||||
for (auto& val : map_) delete val.second;
|
||||
delete map_;
|
||||
}
|
||||
|
||||
void registr(const record_t& record) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
auto* map = get_domain_map(record.domain);
|
||||
map->insert({record.op, record.data});
|
||||
}
|
||||
|
||||
void remove(const record_t& record) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
auto* map = get_domain_map(record.domain);
|
||||
map->erase(record.op);
|
||||
}
|
||||
|
||||
template <class F>
|
||||
F foreach(const F& f_i) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
F f = f_i;
|
||||
for (uint32_t domain = 0, mask = domain_mask_; mask != 0; ++domain, mask >>= 1) {
|
||||
if (mask & 1) {
|
||||
auto map = get_domain_map(domain);
|
||||
auto begin = map->begin();
|
||||
auto end = map->end();
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
if (f.fun({domain, it->first, it->second}) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
private:
|
||||
domain_map_t* get_domain_map(const uint32_t& domain) {
|
||||
domain_mask_ |= 1u << domain;
|
||||
auto domain_it = map_->find(domain);
|
||||
if (domain_it == map_->end()) {
|
||||
auto* domain_map = new domain_map_t;
|
||||
auto ret = map_->insert({domain, domain_map});
|
||||
domain_it = ret.first;
|
||||
}
|
||||
return domain_it->second;
|
||||
}
|
||||
|
||||
mutex_t mutex_;
|
||||
journal_map_t* map_;
|
||||
uint32_t domain_mask_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // SRC_CORE_JOURNAL_H_
|
||||
@@ -29,12 +29,12 @@ class BaseLoader : public T {
|
||||
return f;
|
||||
}
|
||||
|
||||
static inline loader_t& Instance(const bool& preload = false) {
|
||||
static inline loader_t& Instance() {
|
||||
loader_t* obj = instance_.load(std::memory_order_acquire);
|
||||
if (obj == NULL) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_.load(std::memory_order_relaxed) == NULL) {
|
||||
obj = new loader_t(preload);
|
||||
obj = new loader_t();
|
||||
instance_.store(obj, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
@@ -45,11 +45,11 @@ class BaseLoader : public T {
|
||||
static void SetLibName(const char *name) { lib_name_ = name; }
|
||||
|
||||
private:
|
||||
BaseLoader(bool preload) {
|
||||
const int flags = (preload) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD;
|
||||
BaseLoader() {
|
||||
const int flags = RTLD_LAZY;
|
||||
handle_ = dlopen(lib_name_, flags);
|
||||
if ((handle_ == NULL) && (strong_ld_check_)) {
|
||||
fprintf(stderr, "roctracer: Loading '%s' failed, preload(%d), %s\n", lib_name_, (int)preload, dlerror());
|
||||
if (handle_ == NULL) {
|
||||
fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror());
|
||||
abort();
|
||||
}
|
||||
dlerror();
|
||||
@@ -64,7 +64,6 @@ class BaseLoader : public T {
|
||||
static mutex_t mutex_;
|
||||
static const char* lib_name_;
|
||||
static std::atomic<loader_t*> instance_;
|
||||
static const bool strong_ld_check_;
|
||||
void* handle_;
|
||||
};
|
||||
|
||||
@@ -99,26 +98,26 @@ class HipApi {
|
||||
};
|
||||
|
||||
// HCC runtime library loader class
|
||||
#include "inc/roctracer_hcc.h"
|
||||
class HccApi {
|
||||
public:
|
||||
typedef BaseLoader<HccApi> Loader;
|
||||
|
||||
typedef decltype(Kalmar::CLAMP::InitActivityCallback) InitActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) EnableActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::GetCmdName) GetCmdName_t;
|
||||
|
||||
InitActivityCallback_t* InitActivityCallback;
|
||||
EnableActivityCallback_t* EnableActivityCallback;
|
||||
GetCmdName_t* GetCmdName;
|
||||
hipInitAsyncActivityCallback_t* InitActivityCallback;
|
||||
hipEnableAsyncActivityCallback_t* EnableActivityCallback;
|
||||
hipGetOpName_t* GetOpName;
|
||||
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
// Kalmar::CLAMP::InitActivityCallback
|
||||
InitActivityCallback = loader->GetFun<InitActivityCallback_t>("InitActivityCallbackImpl");
|
||||
// Kalmar::CLAMP::EnableActivityIdCallback
|
||||
EnableActivityCallback = loader->GetFun<EnableActivityCallback_t>("EnableActivityCallbackImpl");
|
||||
// Kalmar::CLAMP::GetCmdName
|
||||
GetCmdName = loader->GetFun<GetCmdName_t>("GetCmdNameImpl");
|
||||
#if HIP_VDI
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallback");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallback");
|
||||
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdName");
|
||||
#else
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallbackImpl");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallbackImpl");
|
||||
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdNameImpl");
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@@ -141,20 +140,24 @@ class KfdApi {
|
||||
};
|
||||
|
||||
// rocTX runtime library loader class
|
||||
#include "inc/roctracer_roctx.h"
|
||||
class RocTxApi {
|
||||
public:
|
||||
typedef BaseLoader<RocTxApi> Loader;
|
||||
|
||||
typedef bool (RegisterApiCallback_t)(uint32_t op, void* callback, void* arg);
|
||||
typedef bool (RemoveApiCallback_t)(uint32_t op);
|
||||
typedef decltype(RegisterApiCallback) RegisterApiCallback_t;
|
||||
typedef decltype(RemoveApiCallback) RemoveApiCallback_t;
|
||||
typedef decltype(RangeStackIterate) RangeStackIterate_t;
|
||||
|
||||
RegisterApiCallback_t* RegisterApiCallback;
|
||||
RemoveApiCallback_t* RemoveApiCallback;
|
||||
RangeStackIterate_t* RangeStackIterate;
|
||||
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("RegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("RemoveApiCallback");
|
||||
RangeStackIterate = loader->GetFun<RangeStackIterate_t>("RangeStackIterate");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -168,11 +171,9 @@ typedef BaseLoader<RocTxApi> RocTxLoader;
|
||||
#define LOADER_INSTANTIATE() \
|
||||
template<class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
|
||||
template<class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
|
||||
template<class T> const bool roctracer::BaseLoader<T>::strong_ld_check_ = false;
|
||||
template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \
|
||||
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \
|
||||
template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \
|
||||
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
|
||||
template<> const bool roctracer::RocTxLoader::strong_ld_check_ = false;
|
||||
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so";
|
||||
|
||||
#endif // SRC_CORE_LOADER_H_
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MEMORY_POOL_H_
|
||||
#define MEMORY_POOL_H_
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include "util/exception.h"
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static void allocator_default(char** ptr, size_t size, void* arg) {
|
||||
(void)arg;
|
||||
if (*ptr == NULL) {
|
||||
*ptr = reinterpret_cast<char*>(malloc(size));
|
||||
} else if (size != 0) {
|
||||
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
// Assigning pool allocator
|
||||
alloc_fun_ = allocator_default;
|
||||
alloc_arg_ = NULL;
|
||||
if (properties.alloc_fun != NULL) {
|
||||
alloc_fun_ = properties.alloc_fun;
|
||||
alloc_arg_ = properties.alloc_arg;
|
||||
}
|
||||
|
||||
// Pool definition
|
||||
buffer_size_ = properties.buffer_size;
|
||||
const size_t pool_size = 2 * buffer_size_;
|
||||
pool_begin_ = NULL;
|
||||
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
|
||||
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
|
||||
pool_end_ = pool_begin_ + pool_size;
|
||||
buffer_begin_ = pool_begin_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
|
||||
// Consuming read thread
|
||||
read_callback_fun_ = properties.buffer_callback_fun;
|
||||
read_callback_arg_ = properties.buffer_callback_arg;
|
||||
consumer_arg_.set(this, NULL, NULL, true);
|
||||
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
|
||||
}
|
||||
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
PTHREAD_CALL(pthread_cancel(consumer_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
allocator_default(&pool_begin_, 0, alloc_arg_);
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
getRecord<Record>(record);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
if (write_ptr_ > buffer_begin_) {
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
sync_reader(&consumer_arg_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
const char* begin;
|
||||
const char* end;
|
||||
volatile std::atomic<bool> valid;
|
||||
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
|
||||
obj = obj_p;
|
||||
begin = begin_p;
|
||||
end = end_p;
|
||||
valid.store(valid_p);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Record>
|
||||
Record* getRecord(const Record& init) {
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
next = write_ptr_ + sizeof(Record);
|
||||
}
|
||||
|
||||
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
|
||||
write_ptr_ = next;
|
||||
|
||||
*ptr = init;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void reset_reader(consumer_arg_t* arg) {
|
||||
arg->valid.store(false);
|
||||
}
|
||||
|
||||
static void sync_reader(const consumer_arg_t* arg) {
|
||||
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
|
||||
}
|
||||
|
||||
static void* reader_fun(void* consumer_arg) {
|
||||
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
|
||||
roctracer::MemoryPool* obj = arg->obj;
|
||||
|
||||
reset_reader(arg);
|
||||
|
||||
while (1) {
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
|
||||
while (arg->valid.load() == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
|
||||
reset_reader(arg);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void spawn_reader(const char* data_begin, const char* data_end) {
|
||||
sync_reader(&consumer_arg_);
|
||||
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
|
||||
consumer_arg_.set(this, data_begin, data_end, true);
|
||||
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
|
||||
}
|
||||
|
||||
// pool allocator
|
||||
roctracer_allocator_t alloc_fun_;
|
||||
void* alloc_arg_;
|
||||
|
||||
// Pool definition
|
||||
size_t buffer_size_;
|
||||
char* pool_begin_;
|
||||
char* pool_end_;
|
||||
char* buffer_begin_;
|
||||
char* buffer_end_;
|
||||
char* write_ptr_;
|
||||
mutex_t write_mutex_;
|
||||
|
||||
// Consuming read thread
|
||||
roctracer_buffer_callback_t read_callback_fun_;
|
||||
void* read_callback_arg_;
|
||||
consumer_arg_t consumer_arg_;
|
||||
pthread_t consumer_thread_;
|
||||
pthread_mutex_t read_mutex_;
|
||||
pthread_cond_t read_cond_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // MEMORY_POOL_H_
|
||||
@@ -23,21 +23,27 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer.h"
|
||||
#include "inc/roctracer_hcc.h"
|
||||
#include "inc/roctracer_hip.h"
|
||||
#include "inc/roctracer_ext.h"
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#define PROF_API_IMPL 1
|
||||
#include "inc/roctracer_hsa.h"
|
||||
#ifdef KFD_WRAPPER
|
||||
#include "inc/roctracer_kfd.h"
|
||||
#endif
|
||||
|
||||
#include <dirent.h>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "core/journal.h"
|
||||
#include "core/loader.h"
|
||||
#include "core/memory_pool.h"
|
||||
#include "core/trace_buffer.h"
|
||||
#include "proxy/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
@@ -54,16 +60,6 @@ THE SOFTWARE.
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define HIPAPI_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
@@ -96,7 +92,6 @@ THE SOFTWARE.
|
||||
#endif
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mark callback
|
||||
@@ -177,11 +172,52 @@ namespace roctracer {
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
|
||||
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
|
||||
typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t;
|
||||
typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t;
|
||||
|
||||
struct cb_journal_data_t {
|
||||
roctracer_rtapi_callback_t callback;
|
||||
void* user_data;
|
||||
};
|
||||
typedef Journal<cb_journal_data_t> CbJournal;
|
||||
CbJournal* cb_journal;
|
||||
|
||||
struct act_journal_data_t {
|
||||
roctracer_pool_t* pool;
|
||||
};
|
||||
typedef Journal<act_journal_data_t> ActJournal;
|
||||
ActJournal* act_journal;
|
||||
|
||||
template <class T, class F>
|
||||
struct journal_functor_t {
|
||||
typedef typename T::record_t record_t;
|
||||
F f_;
|
||||
journal_functor_t(F f) : f_(f) {}
|
||||
bool fun(const record_t& record) {
|
||||
f_((activity_domain_t)record.domain, record.op);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
typedef journal_functor_t<CbJournal, roctracer_enable_op_callback_t> cb_en_functor_t;
|
||||
typedef journal_functor_t<CbJournal, roctracer_disable_op_callback_t> cb_dis_functor_t;
|
||||
typedef journal_functor_t<ActJournal, roctracer_enable_op_activity_t> act_en_functor_t;
|
||||
typedef journal_functor_t<ActJournal, roctracer_disable_op_activity_t> act_dis_functor_t;
|
||||
template<> bool cb_en_functor_t::fun(const cb_en_functor_t::record_t& record) {
|
||||
f_((activity_domain_t)record.domain, record.op, record.data.callback, record.data.user_data);
|
||||
return true;
|
||||
}
|
||||
template<> bool act_en_functor_t::fun(const act_en_functor_t::record_t& record) {
|
||||
f_((activity_domain_t)record.domain, record.op, record.data.pool);
|
||||
return true;
|
||||
}
|
||||
|
||||
void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry);
|
||||
void hsa_kernel_handler(::proxy::Tracker::entry_t* entry);
|
||||
TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
|
||||
{roctracer::COPY_ENTRY_TYPE, hsa_async_copy_handler},
|
||||
{roctracer::KERNEL_ENTRY_TYPE, hsa_kernel_handler}
|
||||
{COPY_ENTRY_TYPE, hsa_async_copy_handler},
|
||||
{KERNEL_ENTRY_TYPE, hsa_kernel_handler}
|
||||
};
|
||||
TraceBuffer<trace_entry_t> trace_buffer("HSA GPU", 0x200000, trace_buffer_prm, 2);
|
||||
|
||||
@@ -199,7 +235,12 @@ CoreApiTable CoreApiTable_saved{};
|
||||
AmdExtTable AmdExtTable_saved{};
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable ImageExtTable_saved{};
|
||||
}
|
||||
} // namespace hsa_support
|
||||
|
||||
namespace ext_support {
|
||||
roctracer_start_cb_t roctracer_start_cb = NULL;
|
||||
roctracer_stop_cb_t roctracer_stop_cb = NULL;
|
||||
} // namespace ext_suppoprt
|
||||
|
||||
roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
const util::exception* roctracer_exc_ptr = dynamic_cast<const util::exception*>(&e);
|
||||
@@ -223,167 +264,19 @@ class GlobalCounter {
|
||||
GlobalCounter::mutex_t GlobalCounter::mutex_;
|
||||
GlobalCounter::counter_t GlobalCounter::counter_ = 0;
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static void allocator_default(char** ptr, size_t size, void* arg) {
|
||||
(void)arg;
|
||||
if (*ptr == NULL) {
|
||||
*ptr = reinterpret_cast<char*>(malloc(size));
|
||||
} else if (size != 0) {
|
||||
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
// Assigning pool allocator
|
||||
alloc_fun_ = allocator_default;
|
||||
alloc_arg_ = NULL;
|
||||
if (properties.alloc_fun != NULL) {
|
||||
alloc_fun_ = properties.alloc_fun;
|
||||
alloc_arg_ = properties.alloc_arg;
|
||||
}
|
||||
|
||||
// Pool definition
|
||||
buffer_size_ = properties.buffer_size;
|
||||
const size_t pool_size = 2 * buffer_size_;
|
||||
pool_begin_ = NULL;
|
||||
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
|
||||
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
|
||||
pool_end_ = pool_begin_ + pool_size;
|
||||
buffer_begin_ = pool_begin_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
|
||||
// Consuming read thread
|
||||
read_callback_fun_ = properties.buffer_callback_fun;
|
||||
read_callback_arg_ = properties.buffer_callback_arg;
|
||||
consumer_arg_.set(this, NULL, NULL, true);
|
||||
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
|
||||
}
|
||||
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
PTHREAD_CALL(pthread_cancel(consumer_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
allocator_default(&pool_begin_, 0, alloc_arg_);
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
getRecord<Record>(record);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
if (write_ptr_ > buffer_begin_) {
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
sync_reader(&consumer_arg_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
const char* begin;
|
||||
const char* end;
|
||||
volatile std::atomic<bool> valid;
|
||||
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
|
||||
obj = obj_p;
|
||||
begin = begin_p;
|
||||
end = end_p;
|
||||
valid.store(valid_p);
|
||||
}
|
||||
// Records storage
|
||||
struct roctracer_api_data_t {
|
||||
union {
|
||||
hip_api_data_t hip;
|
||||
};
|
||||
|
||||
template <typename Record>
|
||||
Record* getRecord(const Record& init) {
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
next = write_ptr_ + sizeof(Record);
|
||||
}
|
||||
|
||||
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
|
||||
write_ptr_ = next;
|
||||
|
||||
*ptr = init;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void reset_reader(consumer_arg_t* arg) {
|
||||
arg->valid.store(false);
|
||||
}
|
||||
|
||||
static void sync_reader(const consumer_arg_t* arg) {
|
||||
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
|
||||
}
|
||||
|
||||
static void* reader_fun(void* consumer_arg) {
|
||||
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
|
||||
roctracer::MemoryPool* obj = arg->obj;
|
||||
|
||||
reset_reader(arg);
|
||||
|
||||
while (1) {
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
|
||||
while (arg->valid.load() == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
|
||||
reset_reader(arg);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void spawn_reader(const char* data_begin, const char* data_end) {
|
||||
sync_reader(&consumer_arg_);
|
||||
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
|
||||
consumer_arg_.set(this, data_begin, data_end, true);
|
||||
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
|
||||
}
|
||||
|
||||
// pool allocator
|
||||
roctracer_allocator_t alloc_fun_;
|
||||
void* alloc_arg_;
|
||||
|
||||
// Pool definition
|
||||
size_t buffer_size_;
|
||||
char* pool_begin_;
|
||||
char* pool_end_;
|
||||
char* buffer_begin_;
|
||||
char* buffer_end_;
|
||||
char* write_ptr_;
|
||||
mutex_t write_mutex_;
|
||||
|
||||
// Consuming read thread
|
||||
roctracer_buffer_callback_t read_callback_fun_;
|
||||
void* read_callback_arg_;
|
||||
consumer_arg_t consumer_arg_;
|
||||
pthread_t consumer_thread_;
|
||||
pthread_mutex_t read_mutex_;
|
||||
pthread_cond_t read_cond_;
|
||||
roctracer_api_data_t() {};
|
||||
};
|
||||
struct record_pair_t {
|
||||
roctracer_record_t record;
|
||||
roctracer_api_data_t data;
|
||||
record_pair_t() {};
|
||||
};
|
||||
static thread_local std::stack<record_pair_t> record_pair_stack;
|
||||
|
||||
// Correlation id storage
|
||||
static thread_local activity_correlation_id_t correlation_id_tls = 0;
|
||||
@@ -407,7 +300,7 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre
|
||||
return it->second;
|
||||
}
|
||||
|
||||
roctracer_record_t* HIP_SyncActivityCallback(
|
||||
void* HIP_SyncActivityCallback(
|
||||
uint32_t op_id,
|
||||
roctracer_record_t* record,
|
||||
const void* callback_data,
|
||||
@@ -416,23 +309,57 @@ roctracer_record_t* HIP_SyncActivityCallback(
|
||||
static hsa_rt_utils::Timer timer;
|
||||
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
hip_api_data_t* data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL");
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
|
||||
int phase = ACTIVITY_API_PHASE_ENTER;
|
||||
if (record != NULL) {
|
||||
if (data == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback: data is NULL");
|
||||
phase = data->phase;
|
||||
} else if (pool != NULL) {
|
||||
phase = ACTIVITY_API_PHASE_EXIT;
|
||||
}
|
||||
|
||||
if (phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
// Allocating a record if NULL passed
|
||||
if (record == NULL) {
|
||||
if (data != NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback enter: record is NULL");
|
||||
record_pair_stack.push({});
|
||||
auto& top = record_pair_stack.top();
|
||||
record = &(top.record);
|
||||
data = &(top.data.hip);
|
||||
data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
data_ptr->phase = phase;
|
||||
}
|
||||
|
||||
// Filing record info
|
||||
record->domain = ACTIVITY_DOMAIN_HIP_API;
|
||||
record->op = op_id;
|
||||
record->begin_ns = timer.timestamp_ns();
|
||||
|
||||
// Correlation ID generating
|
||||
uint64_t correlation_id = data->correlation_id;
|
||||
if (correlation_id == 0) {
|
||||
correlation_id = GlobalCounter::Increment();
|
||||
const_cast<hip_api_data_t*>(data)->correlation_id = correlation_id;
|
||||
data_ptr->correlation_id = correlation_id;
|
||||
}
|
||||
record->correlation_id = correlation_id;
|
||||
|
||||
// Passing correlatin ID
|
||||
correlation_id_tls = correlation_id;
|
||||
return record;
|
||||
|
||||
return data_ptr;
|
||||
} else {
|
||||
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: pool is NULL");
|
||||
|
||||
// Getting record of stacked
|
||||
if (record == NULL) {
|
||||
if (record_pair_stack.empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty");
|
||||
auto& top = record_pair_stack.top();
|
||||
record = &(top.record);
|
||||
}
|
||||
|
||||
// Filing record info
|
||||
record->end_ns = timer.timestamp_ns();
|
||||
record->process_id = syscall(__NR_getpid);
|
||||
record->thread_id = syscall(__NR_gettid);
|
||||
@@ -446,9 +373,15 @@ roctracer_record_t* HIP_SyncActivityCallback(
|
||||
pool->Write(ext_record);
|
||||
}
|
||||
|
||||
// Writing record to the buffer
|
||||
pool->Write(*record);
|
||||
|
||||
// popping the record entry
|
||||
if (!record_pair_stack.empty()) record_pair_stack.pop();
|
||||
|
||||
// Clearing correlatin ID
|
||||
correlation_id_tls = 0;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@@ -627,17 +560,19 @@ PUBLIC_API const char* roctracer_op_string(
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
return roctracer::HccLoader::Instance().GetCmdName(kind);
|
||||
return roctracer::HccLoader::Instance().GetOpName(kind);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
return roctracer::HipLoader::Instance().ApiName(op);
|
||||
break;
|
||||
}
|
||||
#if KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
return roctracer::kfd_support::GetApiName(op);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -658,11 +593,13 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
*op = roctracer::kfd_support::GetApiCode(str);
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -673,9 +610,11 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: return 1;
|
||||
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_EXT_API: return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
|
||||
default:
|
||||
@@ -685,18 +624,20 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
|
||||
}
|
||||
|
||||
// Enable runtime API callbacks
|
||||
static void roctracer_enable_callback_impl(
|
||||
uint32_t domain,
|
||||
static roctracer_status_t roctracer_enable_callback_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
switch (domain) {
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
roctracer::hsa_support::cb_table.set(op, callback, user_data);
|
||||
@@ -718,6 +659,17 @@ static void roctracer_enable_callback_impl(
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_enable_callback_impl(
|
||||
uint32_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
|
||||
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(
|
||||
@@ -755,16 +707,18 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback(
|
||||
}
|
||||
|
||||
// Disable runtime API callbacks
|
||||
static void roctracer_disable_callback_impl(
|
||||
uint32_t domain,
|
||||
static roctracer_status_t roctracer_disable_callback_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
@@ -783,6 +737,15 @@ static void roctracer_disable_callback_impl(
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_disable_callback_impl(
|
||||
uint32_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
roctracer::cb_journal->remove({domain, op, {}});
|
||||
roctracer_disable_callback_fun((roctracer_domain_t)domain, op);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_callback(
|
||||
@@ -850,8 +813,8 @@ PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) {
|
||||
}
|
||||
|
||||
// Enable activity records logging
|
||||
static void roctracer_enable_activity_impl(
|
||||
uint32_t domain,
|
||||
static roctracer_status_t roctracer_enable_activity_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
@@ -883,6 +846,16 @@ static void roctracer_enable_activity_impl(
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_enable_activity_impl(
|
||||
uint32_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
roctracer::act_journal->registr({domain, op, {pool}});
|
||||
roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
|
||||
@@ -917,8 +890,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity(
|
||||
}
|
||||
|
||||
// Disable activity records logging
|
||||
static void roctracer_disable_activity_impl(
|
||||
uint32_t domain,
|
||||
static roctracer_status_t roctracer_disable_activity_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
@@ -943,6 +916,15 @@ static void roctracer_disable_activity_impl(
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_disable_activity_impl(
|
||||
uint32_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
roctracer::act_journal->remove({domain, op, {}});
|
||||
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(
|
||||
@@ -1011,10 +993,24 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act
|
||||
|
||||
// Mark API
|
||||
PUBLIC_API void roctracer_mark(const char* str) {
|
||||
if (mark_api_callback_ptr) {
|
||||
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
|
||||
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
|
||||
}
|
||||
if (mark_api_callback_ptr) {
|
||||
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
|
||||
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
|
||||
}
|
||||
}
|
||||
|
||||
// Start API
|
||||
PUBLIC_API void roctracer_start() {
|
||||
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
|
||||
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
|
||||
}
|
||||
|
||||
// Stop API
|
||||
PUBLIC_API void roctracer_stop() {
|
||||
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
|
||||
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
|
||||
}
|
||||
|
||||
// Set properties
|
||||
@@ -1032,11 +1028,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
|
||||
roctracer::hsa_support::output_prefix = ops_properties->output_prefix;
|
||||
|
||||
#if 0
|
||||
// HSA dispatches intercepting
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
#endif
|
||||
|
||||
// HSA async-copy tracing
|
||||
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
@@ -1048,10 +1046,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
|
||||
break;
|
||||
}
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
roctracer::kfd_support::intercept_KFDApiTable();
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
// HSA API properties
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
|
||||
@@ -1062,9 +1062,19 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB");
|
||||
if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name);
|
||||
#ifdef HIP_VDI
|
||||
const char* hip_lib_name = "libamdhip64.so";
|
||||
roctracer::HccLoader::SetLibName(hip_lib_name);
|
||||
roctracer::HipLoader::SetLibName(hip_lib_name);
|
||||
#endif
|
||||
mark_api_callback_ptr = reinterpret_cast<mark_api_callback_t*>(properties);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
roctracer_ext_properties_t* ops_properties = reinterpret_cast<roctracer_ext_properties_t*>(properties);
|
||||
roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb;
|
||||
roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
@@ -1113,6 +1123,8 @@ PUBLIC_API void OnUnload() {
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); }
|
||||
roctracer::util::Logger::Create();
|
||||
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
|
||||
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
|
||||
if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); }
|
||||
}
|
||||
|
||||
|
||||
@@ -24,11 +24,13 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer_roctx.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
|
||||
#include "inc/ext/prof_protocol.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
#include <stack>
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
@@ -62,12 +64,8 @@ THE SOFTWARE.
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
static thread_local std::stack<std::string> message_stack;
|
||||
|
||||
#if 0
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
#endif
|
||||
inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Library errors enumaration
|
||||
@@ -80,12 +78,27 @@ typedef enum {
|
||||
// Library implementation
|
||||
//
|
||||
namespace roctx {
|
||||
typedef std::stack<std::string> message_stack_t;
|
||||
typedef std::map<uint32_t, message_stack_t*> thread_map_t;
|
||||
typedef std::mutex map_mutex_t;
|
||||
map_mutex_t map_mutex;
|
||||
thread_map_t* thread_map = NULL;
|
||||
static thread_local message_stack_t* message_stack = NULL;
|
||||
|
||||
roctx_status_t GetExcStatus(const std::exception& e) {
|
||||
const roctracer::util::exception* roctx_exc_ptr = dynamic_cast<const roctracer::util::exception*>(&e);
|
||||
return (roctx_exc_ptr) ? static_cast<roctx_status_t>(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR;
|
||||
}
|
||||
|
||||
void thread_data_init() {
|
||||
message_stack = new message_stack_t;
|
||||
const auto tid = GetTid();
|
||||
|
||||
std::lock_guard<map_mutex_t> lck(map_mutex);
|
||||
if (thread_map == NULL) thread_map = new thread_map_t;
|
||||
(*thread_map)[tid] = message_stack;
|
||||
}
|
||||
|
||||
// callbacks table
|
||||
extern cb_table_t cb_table;
|
||||
} // namespace roctx
|
||||
@@ -119,31 +132,50 @@ PUBLIC_API void roctxMarkA(const char* message) {
|
||||
|
||||
PUBLIC_API int roctxRangePushA(const char* message) {
|
||||
API_METHOD_PREFIX
|
||||
if (roctx::message_stack == NULL) roctx::thread_data_init();
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangePushA.message = strdup(message);
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg);
|
||||
message_stack.push(strdup(message));
|
||||
roctx::message_stack->push(strdup(message));
|
||||
|
||||
return roctx::message_stack->size() - 1;
|
||||
API_METHOD_CATCH(-1);
|
||||
return message_stack.size()-1;
|
||||
}
|
||||
|
||||
PUBLIC_API int roctxRangePop() {
|
||||
API_METHOD_PREFIX
|
||||
if (roctx::message_stack == NULL) roctx::thread_data_init();
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg);
|
||||
if (message_stack.empty()) {
|
||||
if (roctx::message_stack->empty()) {
|
||||
EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!");
|
||||
} else {
|
||||
message_stack.pop();
|
||||
roctx::message_stack->pop();
|
||||
}
|
||||
|
||||
return roctx::message_stack->size();
|
||||
API_METHOD_CATCH(-1)
|
||||
return message_stack.size();
|
||||
}
|
||||
|
||||
PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) {
|
||||
for (const auto& entry : *roctx::thread_map) {
|
||||
const auto tid = entry.first;
|
||||
for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){
|
||||
std::string message = stack.top();
|
||||
roctx_range_data_t data{};
|
||||
data.message = message.c_str();
|
||||
data.tid = tid;
|
||||
callback(&data, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -33,7 +33,7 @@ set ( RUN_SCRIPT "${TEST_DIR}/run.sh" )
|
||||
add_custom_target( mytest
|
||||
COMMAND make -C "${TEST_DIR}/MatrixTranspose"
|
||||
COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose/MatrixTranspose ${PROJECT_BINARY_DIR}/test"
|
||||
COMMAND make -C "${TEST_DIR}/MatrixTranspose_test"
|
||||
COMMAND HIP_VDI=${HIP_VDI} make -C "${TEST_DIR}/MatrixTranspose_test"
|
||||
COMMAND sh -xc "cp ${TEST_DIR}/MatrixTranspose_test/MatrixTranspose ${PROJECT_BINARY_DIR}/test/MatrixTranspose_test"
|
||||
)
|
||||
|
||||
|
||||
@@ -36,6 +36,10 @@ THE SOFTWARE.
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Mark API
|
||||
extern "C"
|
||||
void roctracer_mark(const char* str);
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
@@ -82,6 +86,10 @@ int main() {
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
uint32_t iterations = 100;
|
||||
while (iterations-- > 0) {
|
||||
std::cout << "## Iteration (" << iterations << ") #################" << std::endl;
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
@@ -112,6 +120,8 @@ int main() {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
@@ -3,6 +3,7 @@ LIB_PATH = $(ROOT_PATH)/build
|
||||
ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64
|
||||
HSA_KMT_INC_PATH ?= /opt/rocm/include
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
HIP_VDI ?= 0
|
||||
ITERATIONS ?= 100
|
||||
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
@@ -24,7 +25,7 @@ EXECUTABLE=./MatrixTranspose
|
||||
|
||||
all: clean $(EXECUTABLE)
|
||||
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DITERATIONS=$(ITERATIONS)
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS)
|
||||
CXX=$(HIPCC)
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
#include <inc/roctracer_kfd.h>
|
||||
|
||||
#ifndef ITERATIONS
|
||||
# define ITERATIONS 100
|
||||
# define ITERATIONS 101
|
||||
#endif
|
||||
#define WIDTH 1024
|
||||
|
||||
@@ -167,9 +167,9 @@ int main() {
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
}
|
||||
|
||||
stop_tracing();
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
@@ -285,7 +285,7 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
record->device_id,
|
||||
record->queue_id
|
||||
);
|
||||
if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
|
||||
fprintf(stdout, " external_id(%lu)",
|
||||
record->external_id
|
||||
@@ -302,31 +302,39 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
|
||||
// Init tracing routine
|
||||
void init_tracing() {
|
||||
std::cout << "# START #############################" << std::endl << std::flush;
|
||||
std::cout << "# INIT #############################" << std::endl << std::flush;
|
||||
// roctracer properties
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
// Enable HIP API callbacks
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL));
|
||||
// Enable HIP activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
}
|
||||
|
||||
// Start tracing routine
|
||||
void start_tracing() {
|
||||
std::cout << "# START #############################" << std::endl << std::flush;
|
||||
// Enable HIP API callbacks
|
||||
ROCTRACER_CALL(roctracer_enable_callback(api_callback, NULL));
|
||||
// Enable HIP activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_activity());
|
||||
std::cout << "# START (" << iterations << ") #############################" << std::endl << std::flush;
|
||||
// Start
|
||||
if ((iterations & 1) == 1) roctracer_start();
|
||||
else roctracer_stop();
|
||||
}
|
||||
|
||||
// Stop tracing routine
|
||||
void stop_tracing() {
|
||||
ROCTRACER_CALL(roctracer_disable_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_activity());
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
std::cout << "# STOP #############################" << std::endl << std::flush;
|
||||
}
|
||||
#else
|
||||
void init_tracing() {}
|
||||
void start_tracing() {}
|
||||
void stop_tracing() {}
|
||||
#endif
|
||||
|
||||
@@ -33,6 +33,12 @@ if [ -n "$1" ] ; then
|
||||
test_filter=$1
|
||||
fi
|
||||
|
||||
# debugger
|
||||
debugger=""
|
||||
if [ -n "$2" ] ; then
|
||||
debugger=$2
|
||||
fi
|
||||
|
||||
# test check routin
|
||||
test_status=0
|
||||
test_runnum=0
|
||||
@@ -46,7 +52,7 @@ eval_test() {
|
||||
if [ $test_filter = -1 -o $test_filter = $test_number ] ; then
|
||||
echo "$label: \"$cmdline\""
|
||||
test_runnum=$((test_runnum + 1))
|
||||
eval "$cmdline"
|
||||
eval "$debugger $cmdline"
|
||||
if [ $? != 0 ] ; then
|
||||
echo "$label: FAILED"
|
||||
test_status=$(($test_status + 1))
|
||||
@@ -59,7 +65,7 @@ eval_test() {
|
||||
|
||||
# Standalone test
|
||||
# rocTrecer is used explicitely by test
|
||||
eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test"
|
||||
eval_test "standalone HIP test" "./test/MatrixTranspose_test"
|
||||
|
||||
# Tool test
|
||||
# rocTracer/tool is loaded by HSA runtime
|
||||
@@ -68,6 +74,8 @@ export ROCTRACER_DOMAIN="hip"
|
||||
|
||||
# HIP test
|
||||
eval_test "tool HIP test" ./test/MatrixTranspose
|
||||
# with trace sampling control <delay:length:rate>
|
||||
eval_test "tool HIP period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose"
|
||||
|
||||
# HSA test
|
||||
export ROCTRACER_DOMAIN="hsa"
|
||||
|
||||
@@ -25,13 +25,21 @@ THE SOFTWARE.
|
||||
|
||||
#include <cxxabi.h> /* names denangle */
|
||||
#include <dirent.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h> /* SYS_xxx definitions */
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h> /* usleep */
|
||||
|
||||
#include <inc/roctracer_ext.h>
|
||||
#include <inc/roctracer_roctx.h>
|
||||
#include <inc/roctracer_hsa.h>
|
||||
#include <inc/roctracer_hip.h>
|
||||
#include <inc/roctracer_hcc.h>
|
||||
#ifdef KFD_WRAPPER
|
||||
#include <inc/roctracer_kfd.h>
|
||||
#endif
|
||||
#include <inc/ext/hsa_rt_utils.hpp>
|
||||
#include <src/core/loader.h>
|
||||
#include <src/core/trace_buffer.h>
|
||||
@@ -60,14 +68,17 @@ hsa_rt_utils::Timer* timer = NULL;
|
||||
thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
thread_local timestamp_t hip_begin_timestamp = 0;
|
||||
thread_local timestamp_t kfd_begin_timestamp = 0;
|
||||
bool trace_roctx = false;
|
||||
bool trace_hsa_api = false;
|
||||
bool trace_hsa_activity = false;
|
||||
bool trace_hip = false;
|
||||
bool trace_hip_api = false;
|
||||
bool trace_hip_activity = false;
|
||||
bool trace_kfd = false;
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
|
||||
// Global output file handle
|
||||
FILE* roctx_file_handle = NULL;
|
||||
FILE* hsa_api_file_handle = NULL;
|
||||
FILE* hsa_async_copy_file_handle = NULL;
|
||||
FILE* hip_api_file_handle = NULL;
|
||||
@@ -79,6 +90,7 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
fflush(roctx_file_handle);
|
||||
fflush(hsa_api_file_handle);
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
fflush(hip_api_file_handle);
|
||||
@@ -90,24 +102,6 @@ void fatal(const std::string msg) {
|
||||
abort();
|
||||
}
|
||||
|
||||
// KFD API callback function
|
||||
void kfd_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
kfd_begin_timestamp = timer->timestamp_fn_ns();
|
||||
} else {
|
||||
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
|
||||
std::ostringstream os;
|
||||
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
|
||||
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
|
||||
}
|
||||
}
|
||||
// C++ symbol demangle
|
||||
static inline const char* cxx_demangle(const char* symbol) {
|
||||
size_t funcnamesize;
|
||||
@@ -116,6 +110,110 @@ static inline const char* cxx_demangle(const char* symbol) {
|
||||
return (ret != NULL) ? ret : symbol;
|
||||
}
|
||||
|
||||
// Tracing control thread
|
||||
uint32_t control_delay_us = 0;
|
||||
uint32_t control_len_us = 0;
|
||||
uint32_t control_dist_us = 0;
|
||||
void* control_thr_fun(void*) {
|
||||
const uint32_t delay_sec = control_delay_us / 1000000;
|
||||
const uint32_t delay_us = control_delay_us % 1000000;
|
||||
const uint32_t len_sec = control_len_us / 1000000;
|
||||
const uint32_t len_us = control_len_us % 1000000;
|
||||
const uint32_t dist_sec = control_dist_us / 1000000;
|
||||
const uint32_t dist_us = control_dist_us % 1000000;
|
||||
bool start = true;
|
||||
|
||||
sleep(delay_sec);
|
||||
usleep(delay_us);
|
||||
|
||||
while (1) {
|
||||
if (start) {
|
||||
start = false;
|
||||
roctracer_start();
|
||||
sleep(len_sec);
|
||||
usleep(len_us);
|
||||
} else {
|
||||
start = true;
|
||||
roctracer_stop();
|
||||
sleep(dist_sec);
|
||||
usleep(dist_us);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// rocTX annotation tracing
|
||||
|
||||
struct roctx_trace_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t type;
|
||||
uint32_t cid;
|
||||
timestamp_t timestamp;
|
||||
uint32_t pid;
|
||||
uint32_t tid;
|
||||
const char* message;
|
||||
};
|
||||
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry);
|
||||
roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm[1] = {{0, roctx_flush_cb}};
|
||||
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1);
|
||||
|
||||
// rocTX callback function
|
||||
static inline void roctx_callback_fun(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
uint32_t tid,
|
||||
const char* message)
|
||||
{
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
|
||||
entry->valid = roctracer::TRACE_ENTRY_COMPL;
|
||||
entry->type = 0;
|
||||
entry->cid = cid;
|
||||
entry->timestamp = timestamp;
|
||||
entry->pid = GetPid();
|
||||
entry->tid = tid;
|
||||
entry->message = (message != NULL) ? strdup(message) : NULL;
|
||||
}
|
||||
|
||||
void roctx_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
roctx_callback_fun(domain, cid, GetTid(), data->args.message);
|
||||
}
|
||||
|
||||
// Start/Stop callbacks
|
||||
void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) {
|
||||
const bool* is_stop_ptr = (bool*)arg;
|
||||
const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA;
|
||||
const char* message = (*is_stop_ptr == true) ? NULL : data->message;
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message);
|
||||
}
|
||||
void stop_callback() {
|
||||
bool is_stop = true;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
}
|
||||
void start_callback() {
|
||||
bool is_stop = false;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
}
|
||||
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry) {
|
||||
std::ostringstream os;
|
||||
os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
|
||||
if (entry->message != NULL) os << ":\"" << entry->message << "\"";
|
||||
else os << ":\"\"";
|
||||
fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HSA API tracing
|
||||
|
||||
struct hsa_api_trace_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t type;
|
||||
@@ -186,6 +284,9 @@ struct hip_api_trace_entry_t {
|
||||
void* ptr;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP API tracing
|
||||
|
||||
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}};
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1);
|
||||
@@ -221,8 +322,10 @@ void hip_api_callback(
|
||||
entry->ptr = *(data->args.hipMalloc.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
#if !HIP_VDI
|
||||
case HIP_API_ID_hipExtModuleLaunchKernel:
|
||||
case HIP_API_ID_hipHccModuleLaunchKernel:
|
||||
#endif
|
||||
const hipFunction_t f = data->args.hipModuleLaunchKernel.f;
|
||||
if (f != NULL) {
|
||||
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f));
|
||||
@@ -289,8 +392,10 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
#if !HIP_VDI
|
||||
case HIP_API_ID_hipExtModuleLaunchKernel:
|
||||
case HIP_API_ID_hipHccModuleLaunchKernel:
|
||||
#endif
|
||||
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n",
|
||||
oss.str().c_str(),
|
||||
cxx_demangle(entry->name),
|
||||
@@ -328,6 +433,32 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// KFD API tracing
|
||||
|
||||
// KFD API callback function
|
||||
#ifdef KFD_WRAPPER
|
||||
void kfd_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
kfd_begin_timestamp = timer->timestamp_fn_ns();
|
||||
} else {
|
||||
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
|
||||
std::ostringstream os;
|
||||
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
|
||||
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Input parser
|
||||
std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) {
|
||||
const std::string space_chars_set = " \t";
|
||||
@@ -401,24 +532,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); }
|
||||
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
// API traces switches
|
||||
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
|
||||
if (trace_domain != NULL) {
|
||||
if (strncmp(trace_domain, "hsa", 3) == 0) {
|
||||
trace_hsa_api = true;
|
||||
trace_hsa_activity = true;
|
||||
}
|
||||
if (strncmp(trace_domain, "hip", 3) == 0) {
|
||||
trace_hip = true;
|
||||
}
|
||||
if (strncmp(trace_domain, "sys", 3) == 0) {
|
||||
trace_hsa_api = true;
|
||||
trace_hip = true;
|
||||
}
|
||||
}
|
||||
|
||||
trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0);
|
||||
|
||||
// Output file
|
||||
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
if (output_prefix != NULL) {
|
||||
@@ -431,11 +544,41 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
}
|
||||
|
||||
// API traces switches
|
||||
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
|
||||
if (trace_domain != NULL) {
|
||||
// ROCTX domain
|
||||
if (std::string(trace_domain).find("roctx") != std::string::npos) {
|
||||
trace_roctx = true;
|
||||
}
|
||||
|
||||
// HSA/HIP domains enabling
|
||||
if (std::string(trace_domain).find("hsa") != std::string::npos) {
|
||||
trace_hsa_api = true;
|
||||
trace_hsa_activity = true;
|
||||
}
|
||||
if (std::string(trace_domain).find("hip") != std::string::npos) {
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
}
|
||||
if (std::string(trace_domain).find("sys") != std::string::npos) {
|
||||
trace_hsa_api = true;
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
}
|
||||
|
||||
// KFD domain enabling
|
||||
if (std::string(trace_domain).find("kfd") != std::string::npos) {
|
||||
trace_kfd = true;
|
||||
}
|
||||
}
|
||||
|
||||
// API trace vector
|
||||
std::vector<std::string> hsa_api_vec;
|
||||
std::vector<std::string> kfd_api_vec;
|
||||
|
||||
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
|
||||
|
||||
// XML input
|
||||
const char* xml_name = getenv("ROCP_INPUT");
|
||||
if (xml_name != NULL) {
|
||||
@@ -458,23 +601,28 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
break;
|
||||
}
|
||||
|
||||
if (name == "rocTX") {
|
||||
found = true;
|
||||
trace_roctx = true;
|
||||
}
|
||||
if (name == "HSA") {
|
||||
found = true;
|
||||
trace_hsa_api = true;
|
||||
hsa_api_vec = api_vec;
|
||||
}
|
||||
if (name == "KFD") {
|
||||
found = true;
|
||||
trace_kfd = true;
|
||||
kfd_api_vec = api_vec;
|
||||
}
|
||||
if (name == "GPU") {
|
||||
found = true;
|
||||
trace_hsa_activity = true;
|
||||
}
|
||||
if (name == "HIP") {
|
||||
found = true;
|
||||
trace_hip = true;
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
}
|
||||
if (name == "KFD") {
|
||||
found = true;
|
||||
trace_kfd = true;
|
||||
kfd_api_vec = api_vec;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -482,7 +630,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Enable HSA API callbacks
|
||||
// Disable HIP activity if HSA activity was set
|
||||
if (trace_hsa_activity == true) trace_hip_activity = false;
|
||||
|
||||
// Enable rpcTX callbacks
|
||||
if (trace_roctx) {
|
||||
roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer_ext_properties_t properties {
|
||||
start_callback,
|
||||
stop_callback
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties);
|
||||
|
||||
fprintf(stdout, " rocTX-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
|
||||
}
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
if (trace_hsa_api) {
|
||||
hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt");
|
||||
|
||||
@@ -504,6 +670,76 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// Enable HSA GPU activity
|
||||
if (trace_hsa_activity) {
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties {
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
NULL,
|
||||
output_prefix
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
}
|
||||
|
||||
// Enable HIP API callbacks/activity
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
|
||||
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
|
||||
|
||||
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
|
||||
// roctracer properties
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x80000;
|
||||
properties.buffer_callback_fun = hcc_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
if (trace_hip_api) {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
}
|
||||
if (trace_hip_activity) {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
}
|
||||
}
|
||||
|
||||
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
|
||||
if (ctrl_str != NULL) {
|
||||
uint32_t ctrl_delay = 0;
|
||||
uint32_t ctrl_len = 0;
|
||||
uint32_t ctrl_rate = 0;
|
||||
int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
|
||||
if (ret != 3) {
|
||||
fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str);
|
||||
abort();
|
||||
}
|
||||
if (ctrl_len > ctrl_rate) {
|
||||
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
|
||||
abort();
|
||||
}
|
||||
control_dist_us = ctrl_rate - ctrl_len;
|
||||
control_len_us = ctrl_len;
|
||||
control_delay_us = ctrl_delay;
|
||||
|
||||
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
|
||||
|
||||
roctracer_stop();
|
||||
|
||||
pthread_t thread;
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
|
||||
}
|
||||
|
||||
#ifdef KFD_WRAPPER
|
||||
// Enable KFD API callbacks/activity
|
||||
if (trace_kfd) {
|
||||
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
|
||||
// initialize KFD tracing
|
||||
@@ -523,38 +759,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
if (trace_hsa_activity) {
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
NULL,
|
||||
output_prefix};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
}
|
||||
|
||||
// Enable HIP API callbacks/activity
|
||||
if (trace_hip) {
|
||||
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
|
||||
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
|
||||
|
||||
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x80000;
|
||||
properties.buffer_callback_fun = hcc_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); }
|
||||
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
@@ -570,6 +775,12 @@ void tool_unload(bool destruct) {
|
||||
is_unloaded = true;
|
||||
roctracer_unload(destruct);
|
||||
|
||||
if (trace_roctx) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
|
||||
roctx_trace_buffer.Flush();
|
||||
close_output_file(roctx_file_handle);
|
||||
}
|
||||
if (trace_hsa_api) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
|
||||
@@ -581,7 +792,7 @@ void tool_unload(bool destruct) {
|
||||
|
||||
close_output_file(hsa_async_copy_file_handle);
|
||||
}
|
||||
if (trace_hip) {
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
|
||||
Ссылка в новой задаче
Block a user