[ROCm/roctracer commit: 5151f6ce04]
This commit is contained in:
Evgeny
2018-05-25 01:26:36 -05:00
rodzic 6afc93f4bf
commit a5fa052876
9 zmienionych plików z 207 dodań i 133 usunięć
-34
Wyświetl plik
@@ -67,37 +67,3 @@ include ( ${LIB_DIR}/CMakeLists.txt )
## Set the VERSION and SOVERSION values
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
## If the library is a release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
endif ()
## Build tests
#add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
## Install information
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${ROCTRACER_NAME}/lib )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION ${ROCTRACER_NAME}/include )
## Packaging directives
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCTRACER library for AMD HSA runtime API extension support" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
## Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" )
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
## RPM package specific variables
set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" )
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
include ( CPack )
+15 -2
Wyświetl plik
@@ -65,7 +65,7 @@
#include <stdint.h>
#include <hip/hip_runtime.h>
#include <hip/hip_cbstr.h>
#include <hip/hip_cbapi.h>
#define ROCTRACER_VERSION_MAJOR 1
#define ROCTRACER_VERSION_MINOR 0
@@ -158,14 +158,27 @@ int roctracer_disable_api_callback(
typedef void roctracer_pool_t;
// Activity record
#if 0
typedef hip_act_record_t roctracer_record_t;
typedef hip_dispatch_record_t roctracer_dispatch_record_t;
typedef hip_copy_record_t roctracer_memcpy_record_t;
typedef hip_barrier_record_t roctracer_barrier_record_t;
#else
typedef hip_act_record_t roctracer_record_t;
typedef hip_copy_record_t roctracer_async_record_t;
typedef roctracer_async_record_t roctracer_dispatch_record_t;
typedef roctracer_async_record_t roctracer_memcpy_record_t;
typedef roctracer_async_record_t roctracer_barrier_record_t;
#endif
// Return next record
static inline int roctracer_next_record(
const roctracer_record_t* record, // [in] record ptr
const roctracer_record_t** next) // [out] next record ptr
{
*next = (record + 1);
*next = (record->async) ?
reinterpret_cast<const roctracer_async_record_t*>(record) + 1 :
record + 1;
return ROCTRACER_STATUS_SUCCESS;
}
+1 -1
Wyświetl plik
@@ -8,4 +8,4 @@ set ( LIB_SRC
)
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HIP_INC_DIR} )
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ${HIP_INC_DIR}/../lib/libhip_hcc.so )
+109 -16
Wyświetl plik
@@ -46,6 +46,9 @@
} \
return err;
// HCC API declaration
extern "C" void HSAOp_set_activity_record(const uint64_t& record);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Internal library methods
//
@@ -99,7 +102,8 @@ class MemoryPool {
}
// Pool definition
buffer_size_ = properties.buffer_size;
buffer_size_shift_ = properties.buffer_size;
buffer_size_ = 1 << buffer_size_shift_;
const size_t pool_size = 2 * buffer_size_;
pool_begin_ = NULL;
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
@@ -109,6 +113,10 @@ class MemoryPool {
buffer_end_ = buffer_begin_ + buffer_size_;
write_ptr_ = buffer_begin_;
// Pool references
buffer_refs_ = new uint32_t[buffer_refs_count_];
memset(buffer_refs_, 0, sizeof(uint32_t) * buffer_refs_count_);
// Consuming read thread
read_callback_fun_ = properties.buffer_callback_fun;
read_callback_arg_ = properties.buffer_callback_arg;
@@ -124,8 +132,9 @@ class MemoryPool {
}
template <typename Record>
void* Write(const Record& record) {
Record* getRecord() {
std::lock_guard<mutex_t> lock(write_mutex_);
char* next = write_ptr_ + sizeof(Record);
if (next > buffer_end_) {
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
@@ -136,9 +145,15 @@ class MemoryPool {
next = write_ptr_ + sizeof(Record);
}
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
*ptr = record;
write_ptr_ = next;
return reinterpret_cast<void*>(ptr);
*ptr = {};
return ptr;
}
template <typename Record>
void Write(const Record& record) {
*getRecord<Record>() = record;
}
void Flush() {
@@ -149,6 +164,9 @@ class MemoryPool {
}
}
void incrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] += 1; }
void decrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] -= 1; }
private:
struct consumer_arg_t {
MemoryPool* obj;
@@ -176,6 +194,10 @@ class MemoryPool {
while (arg->valid == false) {
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
}
const uint32_t buffer_index = obj->calc_buffer_index(arg->begin);
while(obj->buffer_refs_[buffer_index] != 0) PTHREAD_CALL(pthread_yield());
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
reset_reader(arg);
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
@@ -192,11 +214,14 @@ class MemoryPool {
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
}
uint32_t calc_buffer_index(const void* ptr) const { return ((uintptr_t)ptr - (uintptr_t)pool_begin_) >> buffer_size_shift_; }
// pool allocator
roctracer_allocator_t alloc_fun_;
void* alloc_arg_;
// Pool definition
size_t buffer_size_shift_;
size_t buffer_size_;
char* pool_begin_;
char* pool_end_;
@@ -205,6 +230,10 @@ class MemoryPool {
char* write_ptr_;
mutex_t write_mutex_;
// Pool references
uint32_t* buffer_refs_;
static const uint32_t buffer_refs_count_ = 2;
// Consuming read thread
roctracer_buffer_callback_t read_callback_fun_;
void* read_callback_arg_;
@@ -246,10 +275,9 @@ DESTRUCTOR_API void destructor() {
util::Logger::Destroy();
}
// Activity callback to generate an activity record
void ActivityCallback(
roctracer_record_t* record,
uint32_t activity_kind,
roctracer_record_t** record,
const void* callback_data,
void* arg)
{
@@ -259,17 +287,82 @@ void ActivityCallback(
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL");
if (data->phase == ROCTRACER_API_PHASE_ENTER) {
*record = {};
record->name = data->name;
record->activity_kind = activity_kind;
record->begin_ns = timer.timestamp_ns();
*record = pool->getRecord<roctracer_record_t>();
(*record)->activity_kind = activity_kind;
(*record)->begin_ns = timer.timestamp_ns();
// Correlation ID generating
const auto correlation_id = GlobalCounter::Increment();
record->correlation_id = correlation_id;
const_cast<hip_cb_data_t*>(data)->correlation_id = correlation_id;
uint64_t correlation_id = data->correlation_id;
if (correlation_id == 0) {
correlation_id = GlobalCounter::Increment();
const_cast<hip_cb_data_t*>(data)->correlation_id = correlation_id;
}
(*record)->correlation_id = correlation_id;
// Passing record to HCC
HSAOp_set_activity_record(correlation_id);
} else {
record->end_ns = timer.timestamp_ns();
pool->Write<roctracer_record_t>(*record);
(*record)->end_ns = timer.timestamp_ns();
// Clearing record in HCC
HSAOp_set_activity_record(0);
}
}
// HCC activity record type
struct hcc_record_t {
uint32_t op_id; // operation id, dispatch/copy/barrier
uint32_t command_id; // command kind
uint32_t async; // aysnc record, 0/1
uint64_t correlation_id; // activity correlation ID
uint64_t begin_ns; // host begin timestamp, nano-seconds
uint64_t end_ns; // host end timestamp, nano-seconds
int device_id;
uint64_t stream_id;
size_t bytes;
};
void ActivityAsyncCallback(
uint32_t op_id,
void* record,
void* arg)
{
if (op_id == 0) {
// HIP record Sync
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
*reinterpret_cast<uint64_t*>(arg) = record_ptr->correlation_id;
} else {
if (sizeof(hcc_record_t) != sizeof(roctracer_memcpy_record_t)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "record types missmatch");
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
switch (op_id) {
// Dispatch record
case 1: {
roctracer_dispatch_record_t* record_ptr = pool->getRecord<roctracer_dispatch_record_t>();
*record_ptr = *reinterpret_cast<roctracer_dispatch_record_t*>(record);
break;
}
// Memcpy record
case 2: {
roctracer_memcpy_record_t* record_ptr = pool->getRecord<roctracer_memcpy_record_t>();
*record_ptr = *reinterpret_cast<roctracer_memcpy_record_t*>(record);
break;
}
// Barrier record
case 3: {
roctracer_barrier_record_t* record_ptr = pool->getRecord<roctracer_barrier_record_t>();
*record_ptr = *reinterpret_cast<roctracer_barrier_record_t*>(record);
break;
}
// Unknown ID
default:
EXC_ABORT(ROCTRACER_STATUS_ERROR, "Unknown op ID");
}
#if 0
std::cout << "ActivityAsyncCallback " << record_ptr->name
<< " id(" << op_id << "." << record_ptr->activity_kind << ")"
<< " record(" << record << ")"
<< " correlation_id(" << record_ptr->correlation_id << ")"
<< " time-ns(" << start << ":" << end << ")"
<< " arg(" << arg << ")"
<< std::endl << std::flush;
#endif
}
}
@@ -380,7 +473,7 @@ PUBLIC_API int roctracer_enable_api_activity(
if (pool == NULL) pool = roctracer_default_pool();
switch (domain) {
case ROCTRACER_API_DOMAIN_HIP: {
const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, pool);
const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, roctracer::ActivityAsyncCallback, pool);
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")");
break;
}
@@ -1,8 +1,6 @@
#ifndef SRC_UTIL_EXCEPTION_H_
#define SRC_UTIL_EXCEPTION_H_
#include <hsa_ven_amd_aqlprofile.h>
#include <exception>
#include <sstream>
#include <string>
@@ -1,28 +1,32 @@
ROOT_PATH=../..
LIB_PATH=$(ROOT_PATH)/b
LIB_NAME=roctracer64
ROC_LIBS=-L$(LIB_PATH) -l$(LIB_NAME)
HIP_PATH=/home/evgeny/work/roc-1.8.x/hip
HCC_PATH=/home/evgeny/work/roc-1.8.x/hcc/b
export HCC_HOME=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc
HIP_PATH=/home/evgeny/git/compute/external/hip/hip
HIPCC=$(HIP_PATH)/bin/hipcc
HCC_LIBS=-L$(HCC_PATH)/lib -lmcwamp_hsa
SOURCES = MatrixTranspose.cpp
OBJECTS = $(SOURCES:.cpp=.o)
EXECUTABLE=./MatrixTranspose
export LD_LIBRARY_PATH=$(LIB_PATH)
export LD_LIBRARY_PATH=$(LIB_PATH):$(HIP_PATH)/lib:$(HCC_PATH)/lib
export HCC_HOME=$(HCC_PATH)
.PHONY: test
all: $(EXECUTABLE) test
all: clean $(EXECUTABLE) test
CXXFLAGS =-g -DCOMPILE_HIP_ATP_MARKER=1 -I$(ROOT_PATH)
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc
CXX=$(HIPCC)
$(EXECUTABLE): $(OBJECTS)
$(HIPCC) $(OBJECTS) -o $@ -L/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc/lib -lmcwamp_hsa -L$(LIB_PATH) -l$(LIB_NAME)
$(HIPCC) $(OBJECTS) -o $@ $(HCC_LIBS) $(ROC_LIBS)
test: $(EXECUTABLE)
Plik binarny nie jest wyświetlany.
@@ -21,13 +21,11 @@ THE SOFTWARE.
*/
#include <iostream>
#include <string>
// hip header file
#include "hip/hip_runtime.h"
#include "hip/hip_cbstr.h"
#include "inc/roctracer.h"
#include <hip/hip_runtime.h>
#define ITERATIONS 1
#define WIDTH 1024
@@ -55,12 +53,11 @@ void matrixTransposeCPUReference(float* output, float* input, const unsigned int
}
}
int iterations = ITERATIONS;
void init_tracing();
void finish_tracing();
int main() {
init_tracing();
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
@@ -76,68 +73,70 @@ int main() {
int i;
int errors;
int iterations = 10;
begin:
init_tracing();
while (iterations-- > 0) {
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
// Lauching kernel from host
hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
// Lauching kernel from host
hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
if ((errors == 0) && (--iterations != 0)) goto begin;
finish_tracing();
return errors;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP Callbacks/Activity tracing
//
#if 1
#include <inc/roctracer.h>
// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
@@ -165,7 +164,7 @@ extern "C" void hip_api_callback(
{
(void)arg;
const hip_cb_data_t* data = reinterpret_cast<const hip_cb_data_t*>(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%u) %s> ",
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
data->name,
cid,
data->correlation_id,
@@ -182,14 +181,14 @@ extern "C" void hip_api_callback(
case HIP_API_ID_hipMalloc:
fprintf(stdout, "ptr(%p) size(0x%x)",
data->args.hipMalloc.ptr,
(uint32_t)(data->args.hipMalloc.sizeBytes));
(uint32_t)(data->args.hipMalloc.size));
break;
case HIP_API_ID_hipFree:
fprintf(stdout, "ptr(%p)",
data->args.hipFree.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
fprintf(stdout, "kernel(%s) straem(%p)",
fprintf(stdout, "kernel(\"%s\") straem(%p)",
data->args.hipModuleLaunchKernel.f->_name.c_str(),
data->args.hipModuleLaunchKernel.stream);
break;
@@ -199,8 +198,7 @@ extern "C" void hip_api_callback(
data->args.hipLaunchKernel.stream);
break;
case HIP_API_ID_hipKernel:
fprintf(stdout, "kernel(\"%s\") start(%lu) end(%lu)",
data->args.hipKernel.name,
fprintf(stdout, "start(%lu) end(%lu)",
data->args.hipKernel.start,
data->args.hipKernel.end);
break;
@@ -228,12 +226,15 @@ void activity_callback(const char* begin, const char* end, void* arg) {
ROCTRACER_CALL(roctracer_next_record(record, &next));
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
while (reinterpret_cast<const char*>(next) <= end) {
fprintf(stdout, "\t%s id(%u)\tcorrelation_id(%lu): begin_ns(%lu) end_ns(%lu)\n",
record->name,
fprintf(stdout, "\tid(%u.%u.%u)\tcorrelation_id(%lu) host_ns(%lu:%lu)\n",
record->async,
record->op_id,
record->activity_kind,
record->correlation_id,
record->begin_ns,
record->end_ns); fflush(stdout);
record->end_ns
);
fflush(stdout);
record = next;
ROCTRACER_CALL(roctracer_next_record(record, &next));
}
@@ -245,25 +246,24 @@ void init_tracing() {
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel, hip_api_callback, NULL));
// Enable HIP activity tracing
roctracer_properties_t properties{};
properties.buffer_size = 0x100;
properties.buffer_size = 8;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel));
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel));
}
void finish_tracing() {
ROCTRACER_CALL(roctracer_close_pool());
}
#else
void init_tracing() {}
void finish_tracing() {}
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Plik binarny nie jest wyświetlany.