diff --git a/projects/roctracer/CMakeLists.txt b/projects/roctracer/CMakeLists.txt index c74ed4c781..84c0fa8cfa 100644 --- a/projects/roctracer/CMakeLists.txt +++ b/projects/roctracer/CMakeLists.txt @@ -67,37 +67,3 @@ include ( ${LIB_DIR}/CMakeLists.txt ) ## Set the VERSION and SOVERSION values set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" ) set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" ) - -## If the library is a release, strip the target library -if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) - add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) -endif () - -## Build tests -#add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test ) - -## Install information -install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${ROCTRACER_NAME}/lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION ${ROCTRACER_NAME}/include ) - -## Packaging directives -set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" ) -set ( CPACK_PACKAGE_VENDOR "AMD" ) -set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} ) -set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} ) -set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} ) -set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." ) -set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCTRACER library for AMD HSA runtime API extension support" ) -set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" ) - -## Debian package specific variables -set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" ) -set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" ) -set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" ) - -## RPM package specific variables -set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" ) -set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) -set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" ) - -include ( CPack ) diff --git a/projects/roctracer/inc/roctracer.h b/projects/roctracer/inc/roctracer.h index 8e48ef3621..f804b8b8eb 100644 --- a/projects/roctracer/inc/roctracer.h +++ b/projects/roctracer/inc/roctracer.h @@ -65,7 +65,7 @@ #include #include -#include +#include #define ROCTRACER_VERSION_MAJOR 1 #define ROCTRACER_VERSION_MINOR 0 @@ -158,14 +158,27 @@ int roctracer_disable_api_callback( typedef void roctracer_pool_t; // Activity record +#if 0 typedef hip_act_record_t roctracer_record_t; +typedef hip_dispatch_record_t roctracer_dispatch_record_t; +typedef hip_copy_record_t roctracer_memcpy_record_t; +typedef hip_barrier_record_t roctracer_barrier_record_t; +#else +typedef hip_act_record_t roctracer_record_t; +typedef hip_copy_record_t roctracer_async_record_t; +typedef roctracer_async_record_t roctracer_dispatch_record_t; +typedef roctracer_async_record_t roctracer_memcpy_record_t; +typedef roctracer_async_record_t roctracer_barrier_record_t; +#endif // Return next record static inline int roctracer_next_record( const roctracer_record_t* record, // [in] record ptr const roctracer_record_t** next) // [out] next record ptr { - *next = (record + 1); + *next = (record->async) ? + reinterpret_cast(record) + 1 : + record + 1; return ROCTRACER_STATUS_SUCCESS; } diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index b0f3da3c7c..e0c90879be 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -8,4 +8,4 @@ set ( LIB_SRC ) add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HIP_INC_DIR} ) -target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++) +target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ${HIP_INC_DIR}/../lib/libhip_hcc.so ) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 1a39f86d6c..af33702ff2 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -46,6 +46,9 @@ } \ return err; +// HCC API declaration +extern "C" void HSAOp_set_activity_record(const uint64_t& record); + /////////////////////////////////////////////////////////////////////////////////////////////////// // Internal library methods // @@ -99,7 +102,8 @@ class MemoryPool { } // Pool definition - buffer_size_ = properties.buffer_size; + buffer_size_shift_ = properties.buffer_size; + buffer_size_ = 1 << buffer_size_shift_; const size_t pool_size = 2 * buffer_size_; pool_begin_ = NULL; alloc_fun_(&pool_begin_, pool_size, alloc_arg_); @@ -109,6 +113,10 @@ class MemoryPool { buffer_end_ = buffer_begin_ + buffer_size_; write_ptr_ = buffer_begin_; + // Pool references + buffer_refs_ = new uint32_t[buffer_refs_count_]; + memset(buffer_refs_, 0, sizeof(uint32_t) * buffer_refs_count_); + // Consuming read thread read_callback_fun_ = properties.buffer_callback_fun; read_callback_arg_ = properties.buffer_callback_arg; @@ -124,8 +132,9 @@ class MemoryPool { } template - void* Write(const Record& record) { + Record* getRecord() { std::lock_guard lock(write_mutex_); + char* next = write_ptr_ + sizeof(Record); if (next > buffer_end_) { if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); @@ -136,9 +145,15 @@ class MemoryPool { next = write_ptr_ + sizeof(Record); } Record* ptr = reinterpret_cast(write_ptr_); - *ptr = record; write_ptr_ = next; - return reinterpret_cast(ptr); + + *ptr = {}; + return ptr; + } + + template + void Write(const Record& record) { + *getRecord() = record; } void Flush() { @@ -149,6 +164,9 @@ class MemoryPool { } } + void incrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] += 1; } + void decrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] -= 1; } + private: struct consumer_arg_t { MemoryPool* obj; @@ -176,6 +194,10 @@ class MemoryPool { while (arg->valid == false) { PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_))); } + + const uint32_t buffer_index = obj->calc_buffer_index(arg->begin); + while(obj->buffer_refs_[buffer_index] != 0) PTHREAD_CALL(pthread_yield()); + obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_); reset_reader(arg); PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_))); @@ -192,11 +214,14 @@ class MemoryPool { PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_)); } + uint32_t calc_buffer_index(const void* ptr) const { return ((uintptr_t)ptr - (uintptr_t)pool_begin_) >> buffer_size_shift_; } + // pool allocator roctracer_allocator_t alloc_fun_; void* alloc_arg_; // Pool definition + size_t buffer_size_shift_; size_t buffer_size_; char* pool_begin_; char* pool_end_; @@ -205,6 +230,10 @@ class MemoryPool { char* write_ptr_; mutex_t write_mutex_; + // Pool references + uint32_t* buffer_refs_; + static const uint32_t buffer_refs_count_ = 2; + // Consuming read thread roctracer_buffer_callback_t read_callback_fun_; void* read_callback_arg_; @@ -246,10 +275,9 @@ DESTRUCTOR_API void destructor() { util::Logger::Destroy(); } -// Activity callback to generate an activity record void ActivityCallback( - roctracer_record_t* record, uint32_t activity_kind, + roctracer_record_t** record, const void* callback_data, void* arg) { @@ -259,17 +287,82 @@ void ActivityCallback( MemoryPool* pool = reinterpret_cast(arg); if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL"); if (data->phase == ROCTRACER_API_PHASE_ENTER) { - *record = {}; - record->name = data->name; - record->activity_kind = activity_kind; - record->begin_ns = timer.timestamp_ns(); + *record = pool->getRecord(); + (*record)->activity_kind = activity_kind; + (*record)->begin_ns = timer.timestamp_ns(); // Correlation ID generating - const auto correlation_id = GlobalCounter::Increment(); - record->correlation_id = correlation_id; - const_cast(data)->correlation_id = correlation_id; + uint64_t correlation_id = data->correlation_id; + if (correlation_id == 0) { + correlation_id = GlobalCounter::Increment(); + const_cast(data)->correlation_id = correlation_id; + } + (*record)->correlation_id = correlation_id; + // Passing record to HCC + HSAOp_set_activity_record(correlation_id); } else { - record->end_ns = timer.timestamp_ns(); - pool->Write(*record); + (*record)->end_ns = timer.timestamp_ns(); + // Clearing record in HCC + HSAOp_set_activity_record(0); + } +} + +// HCC activity record type +struct hcc_record_t { + uint32_t op_id; // operation id, dispatch/copy/barrier + uint32_t command_id; // command kind + uint32_t async; // aysnc record, 0/1 + uint64_t correlation_id; // activity correlation ID + uint64_t begin_ns; // host begin timestamp, nano-seconds + uint64_t end_ns; // host end timestamp, nano-seconds + int device_id; + uint64_t stream_id; + size_t bytes; +}; + +void ActivityAsyncCallback( + uint32_t op_id, + void* record, + void* arg) +{ + if (op_id == 0) { + // HIP record Sync + roctracer_record_t* record_ptr = reinterpret_cast(record); + *reinterpret_cast(arg) = record_ptr->correlation_id; + } else { + if (sizeof(hcc_record_t) != sizeof(roctracer_memcpy_record_t)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "record types missmatch"); + MemoryPool* pool = reinterpret_cast(arg); + switch (op_id) { + // Dispatch record + case 1: { + roctracer_dispatch_record_t* record_ptr = pool->getRecord(); + *record_ptr = *reinterpret_cast(record); + break; + } + // Memcpy record + case 2: { + roctracer_memcpy_record_t* record_ptr = pool->getRecord(); + *record_ptr = *reinterpret_cast(record); + break; + } + // Barrier record + case 3: { + roctracer_barrier_record_t* record_ptr = pool->getRecord(); + *record_ptr = *reinterpret_cast(record); + break; + } + // Unknown ID + default: + EXC_ABORT(ROCTRACER_STATUS_ERROR, "Unknown op ID"); + } +#if 0 + std::cout << "ActivityAsyncCallback " << record_ptr->name + << " id(" << op_id << "." << record_ptr->activity_kind << ")" + << " record(" << record << ")" + << " correlation_id(" << record_ptr->correlation_id << ")" + << " time-ns(" << start << ":" << end << ")" + << " arg(" << arg << ")" + << std::endl << std::flush; +#endif } } @@ -380,7 +473,7 @@ PUBLIC_API int roctracer_enable_api_activity( if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ROCTRACER_API_DOMAIN_HIP: { - const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, pool); + const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, roctracer::ActivityAsyncCallback, pool); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")"); break; } diff --git a/projects/roctracer/src/util/exception.h b/projects/roctracer/src/util/exception.h index 71fb87a28b..96c91c0804 100644 --- a/projects/roctracer/src/util/exception.h +++ b/projects/roctracer/src/util/exception.h @@ -1,8 +1,6 @@ #ifndef SRC_UTIL_EXCEPTION_H_ #define SRC_UTIL_EXCEPTION_H_ -#include - #include #include #include diff --git a/projects/roctracer/test/MatrixTranspose/Makefile b/projects/roctracer/test/MatrixTranspose/Makefile index 0d21e8ebd0..861a0e21c9 100644 --- a/projects/roctracer/test/MatrixTranspose/Makefile +++ b/projects/roctracer/test/MatrixTranspose/Makefile @@ -1,28 +1,32 @@ ROOT_PATH=../.. LIB_PATH=$(ROOT_PATH)/b LIB_NAME=roctracer64 +ROC_LIBS=-L$(LIB_PATH) -l$(LIB_NAME) + +HIP_PATH=/home/evgeny/work/roc-1.8.x/hip +HCC_PATH=/home/evgeny/work/roc-1.8.x/hcc/b -export HCC_HOME=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc -HIP_PATH=/home/evgeny/git/compute/external/hip/hip HIPCC=$(HIP_PATH)/bin/hipcc +HCC_LIBS=-L$(HCC_PATH)/lib -lmcwamp_hsa SOURCES = MatrixTranspose.cpp OBJECTS = $(SOURCES:.cpp=.o) EXECUTABLE=./MatrixTranspose -export LD_LIBRARY_PATH=$(LIB_PATH) +export LD_LIBRARY_PATH=$(LIB_PATH):$(HIP_PATH)/lib:$(HCC_PATH)/lib +export HCC_HOME=$(HCC_PATH) .PHONY: test -all: $(EXECUTABLE) test +all: clean $(EXECUTABLE) test -CXXFLAGS =-g -DCOMPILE_HIP_ATP_MARKER=1 -I$(ROOT_PATH) +CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc CXX=$(HIPCC) $(EXECUTABLE): $(OBJECTS) - $(HIPCC) $(OBJECTS) -o $@ -L/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc/lib -lmcwamp_hsa -L$(LIB_PATH) -l$(LIB_NAME) + $(HIPCC) $(OBJECTS) -o $@ $(HCC_LIBS) $(ROC_LIBS) test: $(EXECUTABLE) diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose b/projects/roctracer/test/MatrixTranspose/MatrixTranspose deleted file mode 100755 index 769110b31e..0000000000 Binary files a/projects/roctracer/test/MatrixTranspose/MatrixTranspose and /dev/null differ diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp index df57c8b900..90ffc84251 100644 --- a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp +++ b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.cpp @@ -21,13 +21,11 @@ THE SOFTWARE. */ #include -#include // hip header file -#include "hip/hip_runtime.h" -#include "hip/hip_cbstr.h" -#include "inc/roctracer.h" +#include +#define ITERATIONS 1 #define WIDTH 1024 @@ -55,12 +53,11 @@ void matrixTransposeCPUReference(float* output, float* input, const unsigned int } } +int iterations = ITERATIONS; void init_tracing(); void finish_tracing(); int main() { - init_tracing(); - float* Matrix; float* TransposeMatrix; float* cpuTransposeMatrix; @@ -76,68 +73,70 @@ int main() { int i; int errors; - int iterations = 10; - begin: + init_tracing(); + while (iterations-- > 0) { - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - - // Lauching kernel from host - hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH); - - // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; } + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + + // Lauching kernel from host + hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, + gpuMatrix, WIDTH); + + // Memory transfer from device to host + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + errors = 0; + double eps = 1.0E-6; + for (i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - if ((errors == 0) && (--iterations != 0)) goto begin; - finish_tracing(); + return errors; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // HIP Callbacks/Activity tracing // +#if 1 +#include // Macro to check ROC-tracer calls status #define ROCTRACER_CALL(call) \ @@ -165,7 +164,7 @@ extern "C" void hip_api_callback( { (void)arg; const hip_cb_data_t* data = reinterpret_cast(callback_data); - fprintf(stdout, "<%s id(%u)\tcorrelation_id(%u) %s> ", + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", data->name, cid, data->correlation_id, @@ -182,14 +181,14 @@ extern "C" void hip_api_callback( case HIP_API_ID_hipMalloc: fprintf(stdout, "ptr(%p) size(0x%x)", data->args.hipMalloc.ptr, - (uint32_t)(data->args.hipMalloc.sizeBytes)); + (uint32_t)(data->args.hipMalloc.size)); break; case HIP_API_ID_hipFree: fprintf(stdout, "ptr(%p)", data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: - fprintf(stdout, "kernel(%s) straem(%p)", + fprintf(stdout, "kernel(\"%s\") straem(%p)", data->args.hipModuleLaunchKernel.f->_name.c_str(), data->args.hipModuleLaunchKernel.stream); break; @@ -199,8 +198,7 @@ extern "C" void hip_api_callback( data->args.hipLaunchKernel.stream); break; case HIP_API_ID_hipKernel: - fprintf(stdout, "kernel(\"%s\") start(%lu) end(%lu)", - data->args.hipKernel.name, + fprintf(stdout, "start(%lu) end(%lu)", data->args.hipKernel.start, data->args.hipKernel.end); break; @@ -228,12 +226,15 @@ void activity_callback(const char* begin, const char* end, void* arg) { ROCTRACER_CALL(roctracer_next_record(record, &next)); fprintf(stdout, "\tActivity records:\n"); fflush(stdout); while (reinterpret_cast(next) <= end) { - fprintf(stdout, "\t%s id(%u)\tcorrelation_id(%lu): begin_ns(%lu) end_ns(%lu)\n", - record->name, + fprintf(stdout, "\tid(%u.%u.%u)\tcorrelation_id(%lu) host_ns(%lu:%lu)\n", + record->async, + record->op_id, record->activity_kind, record->correlation_id, record->begin_ns, - record->end_ns); fflush(stdout); + record->end_ns + ); + fflush(stdout); record = next; ROCTRACER_CALL(roctracer_next_record(record, &next)); } @@ -245,25 +246,24 @@ void init_tracing() { ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree, hip_api_callback, NULL)); - ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel, hip_api_callback, NULL)); - ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel, hip_api_callback, NULL)); // Enable HIP activity tracing roctracer_properties_t properties{}; - properties.buffer_size = 0x100; + properties.buffer_size = 8; properties.buffer_callback_fun = activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy)); ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc)); ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree)); - ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel)); ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel)); - ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel)); } void finish_tracing() { ROCTRACER_CALL(roctracer_close_pool()); } - +#else +void init_tracing() {} +void finish_tracing() {} +#endif //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.o b/projects/roctracer/test/MatrixTranspose/MatrixTranspose.o deleted file mode 100644 index fd58d278a3..0000000000 Binary files a/projects/roctracer/test/MatrixTranspose/MatrixTranspose.o and /dev/null differ