From 74bf95082a0c14d477a895824bc36550112b4baa Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 25 Nov 2019 16:17:29 -0600 Subject: [PATCH] adding roctx instrumentation to MatrixTranspose test --- test/MatrixTranspose/Makefile | 2 +- test/MatrixTranspose/MatrixTranspose.cpp | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/MatrixTranspose/Makefile b/test/MatrixTranspose/Makefile index 41727de47e..63fae09943 100644 --- a/test/MatrixTranspose/Makefile +++ b/test/MatrixTranspose/Makefile @@ -1,6 +1,6 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so HIP_PATH?= $(wildcard /opt/rocm/hip) ifeq (,$(HIP_PATH)) diff --git a/test/MatrixTranspose/MatrixTranspose.cpp b/test/MatrixTranspose/MatrixTranspose.cpp index d2ecfb8484..264cf2d93b 100644 --- a/test/MatrixTranspose/MatrixTranspose.cpp +++ b/test/MatrixTranspose/MatrixTranspose.cpp @@ -23,8 +23,10 @@ THE SOFTWARE. #include // hip header file -#include "hip/hip_runtime.h" +#include #include "roctracer_ext.h" +// roctx header file +#include #define WIDTH 1024 @@ -94,15 +96,23 @@ int main() { hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); roctracer_mark("before HIP LaunchKernel"); + roctxMark("before hipLaunchKernel"); + roctxRangePush("hipLaunchKernel"); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); roctracer_mark("after HIP LaunchKernel"); + roctxMark("after hipLaunchKernel"); // Memory transfer from device to host + roctxRangePush("hipMemcpy"); + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" + // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);