From 3cb87cdc4602773af57e1e003e7f27cf8a8ccc7b Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary <51944368+cjatin@users.noreply.github.com> Date: Wed, 3 Jul 2019 14:21:19 +0530 Subject: [PATCH] Adding new unroll example (#1187) --- hipamd/samples/2_Cookbook/9_unroll/Readme.md | 12 +- hipamd/samples/2_Cookbook/9_unroll/unroll.cpp | 120 +++++++++--------- 2 files changed, 64 insertions(+), 68 deletions(-) diff --git a/hipamd/samples/2_Cookbook/9_unroll/Readme.md b/hipamd/samples/2_Cookbook/9_unroll/Readme.md index 9b464f1472..0febeaa03c 100644 --- a/hipamd/samples/2_Cookbook/9_unroll/Readme.md +++ b/hipamd/samples/2_Cookbook/9_unroll/Readme.md @@ -16,17 +16,15 @@ Programmers familiar with CUDA, OpenCL will be able to quickly learn and start c ## Simple Matrix Transpose -For this tutorial we will be using MatrixTranspose with shfl operation i.e., our 4_shfl tutorial since it is the only examples where we used loops inside the kernel. +For this tutorial we will be using an example which sums up the row of a 2D matrix and writes it in a 1D array. -In this tutorial, we'll use `#pragma unroll`. In the same sourcecode, we used for MatrixTranspose. We'll add it just before the for loop as following: +In this tutorial, we'll use `#pragma unroll`. In the same sourcecode, we used for gpuMatrixRowSum. We'll add it just before the for loop as following: ``` #pragma unroll - for(int i=0;i eps) { - printf("%d cpu: %f gpu %f\n", i, cpuTransposeMatrix[i], TransposeMatrix[i]); + int errors = 0; + for (int i = 0; i < LENGTH; i++) { + if (sumMatrix[i] != cpuSumMatrix[i]) { + printf("%d - cpu: %d gpu: %d\n", i, sumMatrix[i], cpuSumMatrix[i]); errors++; } } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); + + if (errors == 0) { + printf("PASSED\n"); } else { - printf("PASSED!\n"); + printf("FAILED with %d errors\n", errors); } - // free the resources on device side + // GPU Free hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + hipFree(gpuSumMatrix); - // free the resources on host side + // CPU Free free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); + free(sumMatrix); + free(cpuSumMatrix); return errors; }