From e2fbab109d117de9aecf809bf515f0f1f38d5a15 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 27 Oct 2016 21:26:28 -0500 Subject: [PATCH] show how to use variety of HIP_PROFILE features Change-Id: I6edd66ac4c068b64e1dc3787d7f1f69ab3238469 --- .../2_Cookbook/2_Profiler/MatrixTranspose.cpp | 177 ++++++++++-------- 1 file changed, 96 insertions(+), 81 deletions(-) diff --git a/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp b/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp index 7500957dfc..b6a6b141d2 100644 --- a/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp +++ b/samples/2_Cookbook/2_Profiler/MatrixTranspose.cpp @@ -34,6 +34,8 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Y 4 #define THREADS_PER_BLOCK_Z 1 +#define ITERATIONS 10 + // Device (Kernel) function, it must be void // hipLaunchParm provides the execution configuration __global__ void matrixTranspose(hipLaunchParm lp, @@ -62,10 +64,72 @@ void matrixTransposeCPUReference( } } -int main() { - //HIP_SCOPED_MARKER(__func__, "MainFunc"); - HIP_BEGIN_MARKER(__func__, "MainFunc"); +// Use a separate function to demonstrate how to use function name as part of scoped marker: +void runGPU(float *Matrix, float *TransposeMatrix, + float* gpuMatrix, float* gpuTransposeMatrix) { + + // __func__ is a standard C++ macro which expands to the name of the function, in this case "runGPU" + HIP_SCOPED_MARKER(__func__, "MyGroup"); + + for (int i=0; i eps ) { - errors++; - } - } - if (errors!=0) { - printf("FAILED: %d errors\n",errors); - } else { - printf ("PASSED!\n"); - } - - //free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); - - //free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); + // verify the results + double eps = 1.0E-6; + for (int i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps ) { + errors++; + } } + if (errors!=0) { + printf("FAILED: %d errors\n",errors); + } else { + printf ("PASSED!\n"); + } + + //free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + //free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); HIP_END_MARKER();