Add p2p for cookbook

Change-Id: Id2e77ab31123ef95885d665efe34bc0d4596733a (cherry picked from commit 6fbd0352713ca36e399b1ed4f17c486207a53875) [ROCm/hip-tests commit: 8829e2626c]
2016-11-09 12:06:45 +05:30
@@ -0,0 +1,36 @@
+HIP_PATH?= $(wildcard /opt/rocm/hip)
+ifeq (,$(HIP_PATH))
+	HIP_PATH=../../..
+endif
+
+HIPCC=$(HIP_PATH)/bin/hipcc
+
+TARGET=hcc
+
+SOURCES = peer2peer.cpp
+OBJECTS = $(SOURCES:.cpp=.o)
+
+EXECUTABLE=./peer2peer
+
+.PHONY: test
+
+
+all: $(EXECUTABLE) test
+
+CXXFLAGS =-g
+CXX=$(HIPCC)
+
+
+$(EXECUTABLE): $(OBJECTS)
+	$(HIPCC) $(OBJECTS) -o $@
+
+
+test: $(EXECUTABLE)
+	$(EXECUTABLE)
+
+
+clean:
+	rm -f $(EXECUTABLE)
+	rm -f $(OBJECTS)
+	rm -f $(HIP_PATH)/src/*.o
+
@@ -0,0 +1,241 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANUMTY OF ANY KIND, EXPRESS OR
+IMPLIED, INUMCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNUMESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANUMY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INUM AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INUM CONUMECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <iostream>
+#include <hip/hip_runtime.h>
+#include <assert.h>
+#define WIDTH     32
+
+#define NUM       (WIDTH*WIDTH)
+
+#define THREADS_PER_BLOCK_X  4
+#define THREADS_PER_BLOCK_Y  4
+#define THREADS_PER_BLOCK_Z  1
+
+using namespace std;
+
+#define KNRM  "\x1B[0m"
+#define KRED  "\x1B[31m"
+
+#define failed(...) \
+    printf ("%serror: ", KRED);\
+    printf (__VA_ARGS__);\
+    printf ("\n");\
+    printf ("error: TEST FAILED\n%s", KNRM );\
+    abort();
+
+#define HIPCHECK(error) \
+{\
+    hipError_t localError = error; \
+    if (localError != hipSuccess) { \
+        printf("%serror: '%s'(%d) from %s at %s:%d%s\n", \
+        KRED, hipGetErrorString(localError), localError,\
+        #error,__FILE__, __LINE__, KNRM); \
+        failed("API returned error code.");\
+    }\
+}
+
+void checkPeer2PeerSupport()
+{
+    int gpuCount;
+    int canAccessPeer;
+    int p2pCapableDeviceCount=0;
+
+    HIPCHECK(hipGetDeviceCount(&gpuCount));
+
+    if (gpuCount < 2)
+        printf("Peer2Peer application requires atleast 2 gpu devices");
+
+    for (int currentGpu=0; currentGpu<gpuCount; currentGpu++)
+    {
+        HIPCHECK(hipSetDevice(currentGpu));
+
+        for (int peerGpu=0; peerGpu<currentGpu; peerGpu++)
+        {
+            if (currentGpu!=peerGpu)
+            {
+                HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu));
+                printf ("currentGpu#%d canAccessPeer: peerGpu#%d=%d\n", currentGpu, peerGpu, canAccessPeer);
+            }
+
+            HIPCHECK(hipSetDevice(peerGpu));
+            HIPCHECK(hipDeviceReset());
+        }
+    HIPCHECK(hipSetDevice(currentGpu));
+    HIPCHECK(hipDeviceReset());
+    }
+}
+
+void enablePeer2Peer(int currentGpu, int peerGpu)
+{
+    int canAccessPeer;
+
+    // Must be on a multi-gpu system:
+    assert (currentGpu != peerGpu);
+
+    HIPCHECK(hipSetDevice(currentGpu));
+    hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu);
+
+    if(canAccessPeer==1){
+        HIPCHECK(hipDeviceEnablePeerAccess(peerGpu, 0));
+    }
+    else
+    printf("peer2peer transfer not possible between the selected gpu devices");
+}
+
+void disablePeer2Peer(int currentGpu, int peerGpu)
+{
+    int canAccessPeer;
+
+    // Must be on a multi-gpu system:
+    assert (currentGpu != peerGpu);
+
+    HIPCHECK(hipSetDevice(currentGpu));
+    hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu);
+	
+    if(canAccessPeer==1){
+        HIPCHECK(hipDeviceDisablePeerAccess(peerGpu));
+    }
+    else
+    printf("peer2peer disable not required");
+}
+
+
+__global__ void matrixTranspose_static_shared(hipLaunchParm lp,
+                                float *out,
+                                float *in,
+                                const int width)
+{
+    __shared__ float sharedMem[WIDTH*WIDTH];
+
+    int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+    int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+
+    sharedMem[y * width + x] = in[x * width + y];
+
+    __syncthreads();
+
+    out[y * width + x] = sharedMem[y * width + x];
+}
+
+__global__ void matrixTranspose_dynamic_shared(hipLaunchParm lp,
+                                float *out,
+                                float *in,
+                                const int width)
+{
+    // declare dynamic shared memory
+    HIP_DYNAMIC_SHARED(float, sharedMem)
+
+    int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
+    int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
+
+    sharedMem[y * width + x] = in[x * width + y];
+
+    __syncthreads();
+
+    out[y * width + x] = sharedMem[y * width + x];
+}
+
+int main(){
+
+    checkPeer2PeerSupport();
+    
+    int gpuCount;
+    int currentGpu, peerGpu;
+
+    HIPCHECK(hipGetDeviceCount(&gpuCount));
+
+    currentGpu = 0;
+    peerGpu = (currentGpu + 1);
+
+    printf ("currentGpu=%d peerGpu=%d (Total no. of gpu = %d)\n", currentGpu, peerGpu, gpuCount);
+
+    float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray;
+
+    int width = WIDTH;
+
+    randArray = (float*)malloc(NUM * sizeof(float));
+
+    for(int i = 0; i < NUM; i++)
+    {
+        randArray[i] = (float)i*1.0f;
+    }
+
+    enablePeer2Peer(currentGpu, peerGpu);
+
+    HIPCHECK(hipSetDevice(currentGpu));
+    TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float));
+    hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float));
+    hipMalloc((void**)&data[0], NUM * sizeof(float));
+    hipMemcpy(data[0], randArray, NUM * sizeof(float), hipMemcpyHostToDevice);
+
+    hipLaunchKernel(matrixTranspose_static_shared,
+                    dim3(WIDTH/THREADS_PER_BLOCK_X, WIDTH/THREADS_PER_BLOCK_Y),
+                    dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
+                    0, 0,
+                    gpuTransposeMatrix[0], data[0], width);
+
+	HIPCHECK(hipSetDevice(peerGpu));
+	TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float));
+	hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float));
+	hipMalloc((void**)&data[1], NUM * sizeof(float));
+    hipMemcpy(data[1], gpuTransposeMatrix[0], NUM * sizeof(float), hipMemcpyDeviceToDevice);
+
+    hipLaunchKernel(matrixTranspose_dynamic_shared,
+                    dim3(WIDTH/THREADS_PER_BLOCK_X, WIDTH/THREADS_PER_BLOCK_Y),
+                    dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
+                    sizeof(float)*WIDTH*WIDTH, 0,
+                    gpuTransposeMatrix[1], data[1], width);
+
+    hipMemcpy(TransposeMatrix[1], gpuTransposeMatrix[1], NUM*sizeof(float), hipMemcpyDeviceToHost);
+
+    hipDeviceSynchronize();
+
+    disablePeer2Peer(currentGpu, peerGpu);
+
+    // verify the results
+    int errors = 0;
+    double eps = 1.0E-6;
+    for (int i = 0; i < NUM; i++) {
+        if (std::abs(randArray[i] - TransposeMatrix[1][i]) > eps ) {
+        printf("%d cpu: %f gpu peered data  %f\n",i,randArray[i],TransposeMatrix[1][i]);
+        errors++;
+        }
+    }
+    if (errors!=0) {
+        printf("FAILED: %d errors\n",errors);
+    } else {
+        printf ("Peer2Peer PASSED!\n");
+    }
+
+    free(randArray);
+    for(int i=0;i<2;i++){
+       hipFree(data[i]);
+       hipFree(gpuTransposeMatrix[i]);
+       free(TransposeMatrix[i]);
+    }
+
+    HIPCHECK(hipSetDevice(peerGpu));
+    HIPCHECK(hipDeviceReset());
+
+    HIPCHECK(hipSetDevice(currentGpu));
+    HIPCHECK(hipDeviceReset());
+
+    return 0;
+}