diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index dffb0eb984..b519801940 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -25,6 +25,9 @@ THE SOFTWARE. // hip header file #include +// roctx header file +#include + #ifndef ITERATIONS # define ITERATIONS 100 #endif @@ -97,14 +100,23 @@ int main() { // Memory transfer from host to device hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + roctxMarkA("before hipLaunchKernel"); + roctxRangePushA("hipLaunchKernel"); + //roctxRangePushA("before hipLaunchKernel"); + // roctxRangePushA("before hipLaunchKernel"); // Lauching kernel from host hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); + //roctxRangePop(); + roctxMarkA("after hipLaunchKernel"); // Memory transfer from device to host + roctxRangePushA("hipMemcpy"); hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" + // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -143,6 +155,7 @@ int main() { #if 1 #include #include +#include // Macro to check ROC-tracer calls status #define ROCTRACER_CALL(call) \ @@ -162,6 +175,13 @@ void api_callback( void* arg) { (void)arg; + + if (domain == ACTIVITY_DOMAIN_ROCTX) { + const roctx_api_data_t* data = reinterpret_cast(callback_data); + fprintf(stdout, "ROCTX: \"%s\"\n", data->args.message); + return; + } + const hip_api_data_t* data = reinterpret_cast(callback_data); fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),