Apply .clangformat to all repo source files

Change-Id: I7e79c6058f0303f9a98911e3b7dd2e8596079344
2018-03-12 11:29:03 +05:30
commit 1ba06f63c4
@@ -20,28 +20,24 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */

-#include<iostream>
+#include <iostream>

 // hip header file
 #include "hip/hip_runtime.h"


-#define WIDTH     64
+#define WIDTH 64

-#define NUM       (WIDTH*WIDTH)
+#define NUM (WIDTH * WIDTH)

-#define THREADS_PER_BLOCK_X  4
-#define THREADS_PER_BLOCK_Y  4
-#define THREADS_PER_BLOCK_Z  1
+#define THREADS_PER_BLOCK_X 4
+#define THREADS_PER_BLOCK_Y 4
+#define THREADS_PER_BLOCK_Z 1

 // Device (Kernel) function, it must be void
 // hipLaunchParm provides the execution configuration
-__global__ void matrixTranspose(hipLaunchParm lp,
-                                float *out,
-                                float *in,
-                                const int width)
-{
-    __shared__ float sharedMem[WIDTH*WIDTH];
+__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in, const int width) {
+    __shared__ float sharedMem[WIDTH * WIDTH];

    int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
    int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
@@ -54,89 +50,80 @@ __global__ void matrixTranspose(hipLaunchParm lp,
 }

 // CPU implementation of matrix transpose
-void matrixTransposeCPUReference(
-    float * output,
-    float * input,
-    const unsigned int width)
-{
-    for(unsigned int j=0; j < width; j++)
-    {
-        for(unsigned int i=0; i < width; i++)
-        {
-            output[i*width + j] = input[j*width + i];
+void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
+    for (unsigned int j = 0; j < width; j++) {
+        for (unsigned int i = 0; i < width; i++) {
+            output[i * width + j] = input[j * width + i];
        }
    }
 }

 int main() {
+    float* Matrix;
+    float* TransposeMatrix;
+    float* cpuTransposeMatrix;

-  float* Matrix;
-  float* TransposeMatrix;
-  float* cpuTransposeMatrix;
+    float* gpuMatrix;
+    float* gpuTransposeMatrix;

-  float* gpuMatrix;
-  float* gpuTransposeMatrix;
+    hipDeviceProp_t devProp;
+    hipGetDeviceProperties(&devProp, 0);

-  hipDeviceProp_t devProp;
-  hipGetDeviceProperties(&devProp, 0);
+    std::cout << "Device name " << devProp.name << std::endl;

-  std::cout << "Device name " << devProp.name << std::endl;
+    int i;
+    int errors;

-  int i;
-  int errors;
+    Matrix = (float*)malloc(NUM * sizeof(float));
+    TransposeMatrix = (float*)malloc(NUM * sizeof(float));
+    cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));

-  Matrix = (float*)malloc(NUM * sizeof(float));
-  TransposeMatrix = (float*)malloc(NUM * sizeof(float));
-  cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
-
-  // initialize the input data
-  for (i = 0; i < NUM; i++) {
-    Matrix[i] = (float)i*10.0f;
-  }
-
-  // allocate the memory on the device side
-  hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
-  hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
-
-  // Memory transfer from host to device
-  hipMemcpy(gpuMatrix, Matrix, NUM*sizeof(float), hipMemcpyHostToDevice);
-
-  // Lauching kernel from host
-  hipLaunchKernel(matrixTranspose,
-                  dim3(WIDTH/THREADS_PER_BLOCK_X, WIDTH/THREADS_PER_BLOCK_Y),
-                  dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
-                  0, 0,
-                  gpuTransposeMatrix , gpuMatrix, WIDTH);
-
-  // Memory transfer from device to host
-  hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM*sizeof(float), hipMemcpyDeviceToHost);
-
-  // CPU MatrixTranspose computation
-  matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
-
-  // verify the results
-  errors = 0;
-  double eps = 1.0E-6;
-  for (i = 0; i < NUM; i++) {
-    if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps ) {
-    printf("%d cpu: %f gpu  %f\n",i,cpuTransposeMatrix[i],TransposeMatrix[i]);
-      errors++;
+    // initialize the input data
+    for (i = 0; i < NUM; i++) {
+        Matrix[i] = (float)i * 10.0f;
    }
-  }
-  if (errors!=0) {
-    printf("FAILED: %d errors\n",errors);
-  } else {
-    printf ("PASSED!\n");
-  }

-  //free the resources on device side
-  hipFree(gpuMatrix);
-  hipFree(gpuTransposeMatrix);
+    // allocate the memory on the device side
+    hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
+    hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));

-  //free the resources on host side
-  free(Matrix);
-  free(TransposeMatrix);
-  free(cpuTransposeMatrix);
+    // Memory transfer from host to device
+    hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);

-  return errors;
+    // Lauching kernel from host
+    hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
+                    dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
+                    gpuMatrix, WIDTH);
+
+    // Memory transfer from device to host
+    hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
+
+    // CPU MatrixTranspose computation
+    matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
+
+    // verify the results
+    errors = 0;
+    double eps = 1.0E-6;
+    for (i = 0; i < NUM; i++) {
+        if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
+            printf("%d cpu: %f gpu  %f\n", i, cpuTransposeMatrix[i], TransposeMatrix[i]);
+            errors++;
+        }
+    }
+    if (errors != 0) {
+        printf("FAILED: %d errors\n", errors);
+    } else {
+        printf("PASSED!\n");
+    }
+
+    // free the resources on device side
+    hipFree(gpuMatrix);
+    hipFree(gpuTransposeMatrix);
+
+    // free the resources on host side
+    free(Matrix);
+    free(TransposeMatrix);
+    free(cpuTransposeMatrix);
+
+    return errors;
 }