@@ -67,37 +67,3 @@ include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
endif ()
|
||||
|
||||
## Build tests
|
||||
#add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
|
||||
## Install information
|
||||
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${ROCTRACER_NAME}/lib )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION ${ROCTRACER_NAME}/include )
|
||||
|
||||
## Packaging directives
|
||||
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" )
|
||||
set ( CPACK_PACKAGE_VENDOR "AMD" )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCTRACER library for AMD HSA runtime API extension support" )
|
||||
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
|
||||
|
||||
## Debian package specific variables
|
||||
set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" )
|
||||
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" )
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
|
||||
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" )
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
|
||||
|
||||
include ( CPack )
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_cbstr.h>
|
||||
#include <hip/hip_cbapi.h>
|
||||
|
||||
#define ROCTRACER_VERSION_MAJOR 1
|
||||
#define ROCTRACER_VERSION_MINOR 0
|
||||
@@ -158,14 +158,27 @@ int roctracer_disable_api_callback(
|
||||
typedef void roctracer_pool_t;
|
||||
|
||||
// Activity record
|
||||
#if 0
|
||||
typedef hip_act_record_t roctracer_record_t;
|
||||
typedef hip_dispatch_record_t roctracer_dispatch_record_t;
|
||||
typedef hip_copy_record_t roctracer_memcpy_record_t;
|
||||
typedef hip_barrier_record_t roctracer_barrier_record_t;
|
||||
#else
|
||||
typedef hip_act_record_t roctracer_record_t;
|
||||
typedef hip_copy_record_t roctracer_async_record_t;
|
||||
typedef roctracer_async_record_t roctracer_dispatch_record_t;
|
||||
typedef roctracer_async_record_t roctracer_memcpy_record_t;
|
||||
typedef roctracer_async_record_t roctracer_barrier_record_t;
|
||||
#endif
|
||||
|
||||
// Return next record
|
||||
static inline int roctracer_next_record(
|
||||
const roctracer_record_t* record, // [in] record ptr
|
||||
const roctracer_record_t** next) // [out] next record ptr
|
||||
{
|
||||
*next = (record + 1);
|
||||
*next = (record->async) ?
|
||||
reinterpret_cast<const roctracer_async_record_t*>(record) + 1 :
|
||||
record + 1;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,4 +8,4 @@ set ( LIB_SRC
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HIP_INC_DIR} )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ${HIP_INC_DIR}/../lib/libhip_hcc.so )
|
||||
|
||||
@@ -46,6 +46,9 @@
|
||||
} \
|
||||
return err;
|
||||
|
||||
// HCC API declaration
|
||||
extern "C" void HSAOp_set_activity_record(const uint64_t& record);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal library methods
|
||||
//
|
||||
@@ -99,7 +102,8 @@ class MemoryPool {
|
||||
}
|
||||
|
||||
// Pool definition
|
||||
buffer_size_ = properties.buffer_size;
|
||||
buffer_size_shift_ = properties.buffer_size;
|
||||
buffer_size_ = 1 << buffer_size_shift_;
|
||||
const size_t pool_size = 2 * buffer_size_;
|
||||
pool_begin_ = NULL;
|
||||
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
|
||||
@@ -109,6 +113,10 @@ class MemoryPool {
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
|
||||
// Pool references
|
||||
buffer_refs_ = new uint32_t[buffer_refs_count_];
|
||||
memset(buffer_refs_, 0, sizeof(uint32_t) * buffer_refs_count_);
|
||||
|
||||
// Consuming read thread
|
||||
read_callback_fun_ = properties.buffer_callback_fun;
|
||||
read_callback_arg_ = properties.buffer_callback_arg;
|
||||
@@ -124,8 +132,9 @@ class MemoryPool {
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void* Write(const Record& record) {
|
||||
Record* getRecord() {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
@@ -136,9 +145,15 @@ class MemoryPool {
|
||||
next = write_ptr_ + sizeof(Record);
|
||||
}
|
||||
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
|
||||
*ptr = record;
|
||||
write_ptr_ = next;
|
||||
return reinterpret_cast<void*>(ptr);
|
||||
|
||||
*ptr = {};
|
||||
return ptr;
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
*getRecord<Record>() = record;
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
@@ -149,6 +164,9 @@ class MemoryPool {
|
||||
}
|
||||
}
|
||||
|
||||
void incrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] += 1; }
|
||||
void decrementRef(void* ptr) { buffer_refs_[calc_buffer_index(ptr)] -= 1; }
|
||||
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
@@ -176,6 +194,10 @@ class MemoryPool {
|
||||
while (arg->valid == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
const uint32_t buffer_index = obj->calc_buffer_index(arg->begin);
|
||||
while(obj->buffer_refs_[buffer_index] != 0) PTHREAD_CALL(pthread_yield());
|
||||
|
||||
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
|
||||
reset_reader(arg);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
|
||||
@@ -192,11 +214,14 @@ class MemoryPool {
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
|
||||
}
|
||||
|
||||
uint32_t calc_buffer_index(const void* ptr) const { return ((uintptr_t)ptr - (uintptr_t)pool_begin_) >> buffer_size_shift_; }
|
||||
|
||||
// pool allocator
|
||||
roctracer_allocator_t alloc_fun_;
|
||||
void* alloc_arg_;
|
||||
|
||||
// Pool definition
|
||||
size_t buffer_size_shift_;
|
||||
size_t buffer_size_;
|
||||
char* pool_begin_;
|
||||
char* pool_end_;
|
||||
@@ -205,6 +230,10 @@ class MemoryPool {
|
||||
char* write_ptr_;
|
||||
mutex_t write_mutex_;
|
||||
|
||||
// Pool references
|
||||
uint32_t* buffer_refs_;
|
||||
static const uint32_t buffer_refs_count_ = 2;
|
||||
|
||||
// Consuming read thread
|
||||
roctracer_buffer_callback_t read_callback_fun_;
|
||||
void* read_callback_arg_;
|
||||
@@ -246,10 +275,9 @@ DESTRUCTOR_API void destructor() {
|
||||
util::Logger::Destroy();
|
||||
}
|
||||
|
||||
// Activity callback to generate an activity record
|
||||
void ActivityCallback(
|
||||
roctracer_record_t* record,
|
||||
uint32_t activity_kind,
|
||||
roctracer_record_t** record,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
@@ -259,17 +287,82 @@ void ActivityCallback(
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
if (pool == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback pool is NULL");
|
||||
if (data->phase == ROCTRACER_API_PHASE_ENTER) {
|
||||
*record = {};
|
||||
record->name = data->name;
|
||||
record->activity_kind = activity_kind;
|
||||
record->begin_ns = timer.timestamp_ns();
|
||||
*record = pool->getRecord<roctracer_record_t>();
|
||||
(*record)->activity_kind = activity_kind;
|
||||
(*record)->begin_ns = timer.timestamp_ns();
|
||||
// Correlation ID generating
|
||||
const auto correlation_id = GlobalCounter::Increment();
|
||||
record->correlation_id = correlation_id;
|
||||
const_cast<hip_cb_data_t*>(data)->correlation_id = correlation_id;
|
||||
uint64_t correlation_id = data->correlation_id;
|
||||
if (correlation_id == 0) {
|
||||
correlation_id = GlobalCounter::Increment();
|
||||
const_cast<hip_cb_data_t*>(data)->correlation_id = correlation_id;
|
||||
}
|
||||
(*record)->correlation_id = correlation_id;
|
||||
// Passing record to HCC
|
||||
HSAOp_set_activity_record(correlation_id);
|
||||
} else {
|
||||
record->end_ns = timer.timestamp_ns();
|
||||
pool->Write<roctracer_record_t>(*record);
|
||||
(*record)->end_ns = timer.timestamp_ns();
|
||||
// Clearing record in HCC
|
||||
HSAOp_set_activity_record(0);
|
||||
}
|
||||
}
|
||||
|
||||
// HCC activity record type
|
||||
struct hcc_record_t {
|
||||
uint32_t op_id; // operation id, dispatch/copy/barrier
|
||||
uint32_t command_id; // command kind
|
||||
uint32_t async; // aysnc record, 0/1
|
||||
uint64_t correlation_id; // activity correlation ID
|
||||
uint64_t begin_ns; // host begin timestamp, nano-seconds
|
||||
uint64_t end_ns; // host end timestamp, nano-seconds
|
||||
int device_id;
|
||||
uint64_t stream_id;
|
||||
size_t bytes;
|
||||
};
|
||||
|
||||
void ActivityAsyncCallback(
|
||||
uint32_t op_id,
|
||||
void* record,
|
||||
void* arg)
|
||||
{
|
||||
if (op_id == 0) {
|
||||
// HIP record Sync
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
*reinterpret_cast<uint64_t*>(arg) = record_ptr->correlation_id;
|
||||
} else {
|
||||
if (sizeof(hcc_record_t) != sizeof(roctracer_memcpy_record_t)) EXC_ABORT(ROCTRACER_STATUS_ERROR, "record types missmatch");
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
switch (op_id) {
|
||||
// Dispatch record
|
||||
case 1: {
|
||||
roctracer_dispatch_record_t* record_ptr = pool->getRecord<roctracer_dispatch_record_t>();
|
||||
*record_ptr = *reinterpret_cast<roctracer_dispatch_record_t*>(record);
|
||||
break;
|
||||
}
|
||||
// Memcpy record
|
||||
case 2: {
|
||||
roctracer_memcpy_record_t* record_ptr = pool->getRecord<roctracer_memcpy_record_t>();
|
||||
*record_ptr = *reinterpret_cast<roctracer_memcpy_record_t*>(record);
|
||||
break;
|
||||
}
|
||||
// Barrier record
|
||||
case 3: {
|
||||
roctracer_barrier_record_t* record_ptr = pool->getRecord<roctracer_barrier_record_t>();
|
||||
*record_ptr = *reinterpret_cast<roctracer_barrier_record_t*>(record);
|
||||
break;
|
||||
}
|
||||
// Unknown ID
|
||||
default:
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR, "Unknown op ID");
|
||||
}
|
||||
#if 0
|
||||
std::cout << "ActivityAsyncCallback " << record_ptr->name
|
||||
<< " id(" << op_id << "." << record_ptr->activity_kind << ")"
|
||||
<< " record(" << record << ")"
|
||||
<< " correlation_id(" << record_ptr->correlation_id << ")"
|
||||
<< " time-ns(" << start << ":" << end << ")"
|
||||
<< " arg(" << arg << ")"
|
||||
<< std::endl << std::flush;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,7 +473,7 @@ PUBLIC_API int roctracer_enable_api_activity(
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
switch (domain) {
|
||||
case ROCTRACER_API_DOMAIN_HIP: {
|
||||
const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, pool);
|
||||
const hipError_t hip_err = hipRegisterActivityCallback(activity_kind, roctracer::ActivityCallback, roctracer::ActivityAsyncCallback, pool);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")");
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#ifndef SRC_UTIL_EXCEPTION_H_
|
||||
#define SRC_UTIL_EXCEPTION_H_
|
||||
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include <exception>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
@@ -1,28 +1,32 @@
|
||||
ROOT_PATH=../..
|
||||
LIB_PATH=$(ROOT_PATH)/b
|
||||
LIB_NAME=roctracer64
|
||||
ROC_LIBS=-L$(LIB_PATH) -l$(LIB_NAME)
|
||||
|
||||
HIP_PATH=/home/evgeny/work/roc-1.8.x/hip
|
||||
HCC_PATH=/home/evgeny/work/roc-1.8.x/hcc/b
|
||||
|
||||
export HCC_HOME=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc
|
||||
HIP_PATH=/home/evgeny/git/compute/external/hip/hip
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HCC_LIBS=-L$(HCC_PATH)/lib -lmcwamp_hsa
|
||||
|
||||
SOURCES = MatrixTranspose.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
EXECUTABLE=./MatrixTranspose
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH):$(HIP_PATH)/lib:$(HCC_PATH)/lib
|
||||
export HCC_HOME=$(HCC_PATH)
|
||||
|
||||
.PHONY: test
|
||||
|
||||
|
||||
all: $(EXECUTABLE) test
|
||||
all: clean $(EXECUTABLE) test
|
||||
|
||||
CXXFLAGS =-g -DCOMPILE_HIP_ATP_MARKER=1 -I$(ROOT_PATH)
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc
|
||||
CXX=$(HIPCC)
|
||||
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@ -L/home/evgeny/git/compute/out/ubuntu-16.04/16.04/hcc/lib -lmcwamp_hsa -L$(LIB_PATH) -l$(LIB_NAME)
|
||||
$(HIPCC) $(OBJECTS) -o $@ $(HCC_LIBS) $(ROC_LIBS)
|
||||
|
||||
|
||||
test: $(EXECUTABLE)
|
||||
|
||||
Plik binarny nie jest wyświetlany.
@@ -21,13 +21,11 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_cbstr.h"
|
||||
#include "inc/roctracer.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define ITERATIONS 1
|
||||
#define WIDTH 1024
|
||||
|
||||
|
||||
@@ -55,12 +53,11 @@ void matrixTransposeCPUReference(float* output, float* input, const unsigned int
|
||||
}
|
||||
}
|
||||
|
||||
int iterations = ITERATIONS;
|
||||
void init_tracing();
|
||||
void finish_tracing();
|
||||
|
||||
int main() {
|
||||
init_tracing();
|
||||
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
@@ -76,68 +73,70 @@ int main() {
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
int iterations = 10;
|
||||
begin:
|
||||
init_tracing();
|
||||
while (iterations-- > 0) {
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
if ((errors == 0) && (--iterations != 0)) goto begin;
|
||||
|
||||
finish_tracing();
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP Callbacks/Activity tracing
|
||||
//
|
||||
#if 1
|
||||
#include <inc/roctracer.h>
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
@@ -165,7 +164,7 @@ extern "C" void hip_api_callback(
|
||||
{
|
||||
(void)arg;
|
||||
const hip_cb_data_t* data = reinterpret_cast<const hip_cb_data_t*>(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%u) %s> ",
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
data->name,
|
||||
cid,
|
||||
data->correlation_id,
|
||||
@@ -182,14 +181,14 @@ extern "C" void hip_api_callback(
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(stdout, "ptr(%p) size(0x%x)",
|
||||
data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.sizeBytes));
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
fprintf(stdout, "ptr(%p)",
|
||||
data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
fprintf(stdout, "kernel(%s) straem(%p)",
|
||||
fprintf(stdout, "kernel(\"%s\") straem(%p)",
|
||||
data->args.hipModuleLaunchKernel.f->_name.c_str(),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
@@ -199,8 +198,7 @@ extern "C" void hip_api_callback(
|
||||
data->args.hipLaunchKernel.stream);
|
||||
break;
|
||||
case HIP_API_ID_hipKernel:
|
||||
fprintf(stdout, "kernel(\"%s\") start(%lu) end(%lu)",
|
||||
data->args.hipKernel.name,
|
||||
fprintf(stdout, "start(%lu) end(%lu)",
|
||||
data->args.hipKernel.start,
|
||||
data->args.hipKernel.end);
|
||||
break;
|
||||
@@ -228,12 +226,15 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &next));
|
||||
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
|
||||
while (reinterpret_cast<const char*>(next) <= end) {
|
||||
fprintf(stdout, "\t%s id(%u)\tcorrelation_id(%lu): begin_ns(%lu) end_ns(%lu)\n",
|
||||
record->name,
|
||||
fprintf(stdout, "\tid(%u.%u.%u)\tcorrelation_id(%lu) host_ns(%lu:%lu)\n",
|
||||
record->async,
|
||||
record->op_id,
|
||||
record->activity_kind,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns); fflush(stdout);
|
||||
record->end_ns
|
||||
);
|
||||
fflush(stdout);
|
||||
record = next;
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &next));
|
||||
}
|
||||
@@ -245,25 +246,24 @@ void init_tracing() {
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_api_callback(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel, hip_api_callback, NULL));
|
||||
|
||||
// Enable HIP activity tracing
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x100;
|
||||
properties.buffer_size = 8;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMemcpy));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipMalloc));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipFree));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipLaunchKernel));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipModuleLaunchKernel));
|
||||
ROCTRACER_CALL(roctracer_enable_api_activity(ROCTRACER_API_DOMAIN_HIP, HIP_API_ID_hipKernel));
|
||||
}
|
||||
|
||||
void finish_tracing() {
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
}
|
||||
|
||||
#else
|
||||
void init_tracing() {}
|
||||
void finish_tracing() {}
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Plik binarny nie jest wyświetlany.
Reference in New Issue
Block a user