From 419d46f9011e472fa8b908030a426695c95fbc1d Mon Sep 17 00:00:00 2001 From: streamhsa Date: Sat, 16 Apr 2016 21:12:09 +0800 Subject: [PATCH 1/4] Add __ldg [ROCm/clr commit: 0426564a7b7353cef6d52c52341e4d4184d4db56] --- .../hipamd/include/hcc_detail/hip_runtime.h | 7 + projects/clr/hipamd/tests/src/CMakeLists.txt | 2 + projects/clr/hipamd/tests/src/hip_ldg.cpp | 150 ++++++++++++++++++ 3 files changed, 159 insertions(+) create mode 100644 projects/clr/hipamd/tests/src/hip_ldg.cpp diff --git a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h index aa420e992d..9f88017215 100644 --- a/projects/clr/hipamd/include/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hcc_detail/hip_runtime.h @@ -401,6 +401,13 @@ __device__ int __all( int input); __device__ int __any( int input); __device__ unsigned long long int __ballot( int input); +// __ldg function +template +__device__ __forceinline__ T __ldg( const T * addr) +{ + return *addr; +} + // warp shuffle functions #ifdef __cplusplus diff --git a/projects/clr/hipamd/tests/src/CMakeLists.txt b/projects/clr/hipamd/tests/src/CMakeLists.txt index 68099ae083..5b1829286d 100644 --- a/projects/clr/hipamd/tests/src/CMakeLists.txt +++ b/projects/clr/hipamd/tests/src/CMakeLists.txt @@ -145,6 +145,7 @@ make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) +make_hip_executable (hip_ldg hip_ldg.cpp) make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) make_hip_executable (hipEnvVar hipEnvVar.cpp) make_hip_executable (hipEnvVarDriver hipEnvVarDriver.cpp) @@ -184,6 +185,7 @@ make_test(hip_popc " " ) make_test(hip_brev " " ) make_test(hip_clz " " ) make_test(hip_ffs " " ) +make_test(hip_ldg " " ) make_test(hipEventRecord --iterations 10) make_test(hipMemset " " ) make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes. diff --git a/projects/clr/hipamd/tests/src/hip_ldg.cpp b/projects/clr/hipamd/tests/src/hip_ldg.cpp new file mode 100644 index 0000000000..2f281c5991 --- /dev/null +++ b/projects/clr/hipamd/tests/src/hip_ldg.cpp @@ -0,0 +1,150 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#include "hip_runtime.h" + + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) + + +#define WIDTH 1024 +#define HEIGHT 1024 + +#define NUM (WIDTH*HEIGHT) + +#define THREADS_PER_BLOCK_X 16 +#define THREADS_PER_BLOCK_Y 16 +#define THREADS_PER_BLOCK_Z 1 + +__global__ void +vectoradd_float(hipLaunchParm lp, + float* a, const float* bm, const float* cm, int width, int height) + + { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + int i = y * width + x; + if ( i < (width * height)) { + a[i] = __ldg(&bm[i]) + __ldg(&cm[i]); + } + + + + } + +#if 0 +__kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { + + + int x = blockDimX * blockIdx.x + threadIdx.x; + int y = blockDimY * blockIdy.y + threadIdx.y; + + int i = y * width + x; + if ( i < (width * height)) { + a[i] = b[i] + c[i]; + } +} +#endif + +using namespace std; + +int main() { + + float* hostA; + float* hostB; + float* hostC; + + float* deviceA; + float* deviceB; + float* deviceC; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + cout << " System minor " << devProp.minor << endl; + cout << " System major " << devProp.major << endl; + cout << " agent prop name " << devProp.name << endl; + + + + cout << "__ldg " << endl ; + + + int i; + int errors; + + hostA = (float*)malloc(NUM * sizeof(float)); + hostB = (float*)malloc(NUM * sizeof(float)); + hostC = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + hostB[i] = (float)i; + hostC[i] = (float)i*100.0f; + } + + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(float))); + HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(float))); + HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(float))); + + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(float), hipMemcpyHostToDevice)); + + + hipLaunchKernel(vectoradd_float, + dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), + 0, 0, + deviceA ,deviceB ,deviceC ,WIDTH ,HEIGHT); + + + HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(float), hipMemcpyDeviceToHost)); + + // verify the results + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostA[i] != (hostB[i] + hostC[i])) { + errors++; + } + } + if (errors!=0) { + printf("FAILED: %d errors\n",errors); + } else { + printf ("PASSED!\n"); + } + + HIP_ASSERT(hipFree(deviceA)); + HIP_ASSERT(hipFree(deviceB)); + HIP_ASSERT(hipFree(deviceC)); + + free(hostA); + free(hostB); + free(hostC); + + //hipResetDefaultAccelerator(); + + return errors; +} From f0a9d95d552099026012dd605b065fee5271bc53 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 16 Apr 2016 17:10:13 -0500 Subject: [PATCH 2/4] Corrected Memcpydefault [ROCm/clr commit: dc61929a3d93a8f3d9ab9a12626c33c129ca0db1] --- .../clr/hipamd/include/hcc_detail/hip_hcc.h | 2 +- projects/clr/hipamd/src/hip_hcc.cpp | 37 +++++++++++-------- projects/clr/hipamd/src/hip_memory.cpp | 4 +- projects/clr/hipamd/src/hip_peer.cpp | 10 ----- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/projects/clr/hipamd/include/hcc_detail/hip_hcc.h b/projects/clr/hipamd/include/hcc_detail/hip_hcc.h index deb9fd0b04..c0ddf1595a 100644 --- a/projects/clr/hipamd/include/hcc_detail/hip_hcc.h +++ b/projects/clr/hipamd/include/hcc_detail/hip_hcc.h @@ -450,7 +450,7 @@ private: void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal); // The unsigned return is hipMemcpyKind - unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem); + unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem); void setCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent); unsigned _device_index; // index into the g_device array diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 41469c5ee1..d96d372354 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -1134,22 +1134,30 @@ void ihipSetTs(hipEvent_t e) // Resolve hipMemcpyDefault to a known type. -unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem) +unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem) { hipMemcpyKind kind = hipMemcpyDefault; - - if (!srcInDeviceMem && !dstInDeviceMem) { + if(!srcTracked && !dstTracked) + { kind = hipMemcpyHostToHost; - } else if (!srcInDeviceMem && dstInDeviceMem) { - kind = hipMemcpyHostToDevice; - } else if (srcInDeviceMem && !dstInDeviceMem) { - kind = hipMemcpyDeviceToHost; - } else if (srcInDeviceMem && dstInDeviceMem) { - kind = hipMemcpyDeviceToDevice; + } + if(!srcTracked && dstTracked) + { + if(dstInDeviceMem) { kind = hipMemcpyHostToDevice; } + else{ kind = hipMemcpyHostToHost; } + } + if (srcTracked && !dstTracked) { + if(srcInDeviceMem) { kind = hipMemcpyDeviceToHost; } + else { kind = hipMemcpyHostToHost; } + } + if (srcTracked && dstTracked) { + if(srcInDeviceMem && dstInDeviceMem) { kind = hipMemcpyDeviceToDevice; } + if(srcInDeviceMem && !dstInDeviceMem) { kind = hipMemcpyDeviceToHost; } + if(!srcInDeviceMem && !dstInDeviceMem) { kind = hipMemcpyHostToHost; } + if(!srcInDeviceMem && dstInDeviceMem) { kind = hipMemcpyHostToDevice; } } assert (kind != hipMemcpyDefault); - return kind; } @@ -1185,17 +1193,16 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS); bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); - // Resolve default to a specific Kind so we know which algorithm to use: if (kind == hipMemcpyDefault) { bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem); bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem); - kind = resolveMemcpyDirection(srcInDeviceMem, dstInDeviceMem); + kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem); }; hsa_signal_t depSignal; - if ((kind == hipMemcpyHostToDevice) && (!srcTracked)) { + if (kind == hipMemcpyHostToDevice) { int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyH2D); if (HIP_STAGING_BUFFERS) { tprintf(DB_COPY1, "D2H && !dstTracked: staged copy H2D dst=%p src=%p sz=%zu\n", dst, src, sizeBytes); @@ -1217,7 +1224,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const hc::am_copy(dst, src, sizeBytes); #endif } - } else if ((kind == hipMemcpyDeviceToHost) && (!dstTracked)) { + } else if (kind == hipMemcpyDeviceToHost) { int depSignalCnt = preCopyCommand(crit, NULL, &depSignal, ihipCommandCopyD2H); if (HIP_STAGING_BUFFERS) { tprintf(DB_COPY1, "D2H && !dstTracked: staged copy D2H dst=%p src=%p sz=%zu\n", dst, src, sizeBytes); @@ -1323,7 +1330,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig if (kind == hipMemcpyDefault) { bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem); bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem); - kind = resolveMemcpyDirection(srcInDeviceMem, dstInDeviceMem); + kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem); } diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index f99ca4eade..dcde66c4c7 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -156,10 +156,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) if(device){ if(flags == hipHostMallocDefault){ *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); - if(sizeBytes && (*ptr == NULL)){ + if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; }else{ - hc::am_memtracker_update(*ptr, device->_device_index, 0); + hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned); } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } else if(flags & hipHostMallocMapped){ diff --git a/projects/clr/hipamd/src/hip_peer.cpp b/projects/clr/hipamd/src/hip_peer.cpp index d45f95dc6c..900f2a6efb 100644 --- a/projects/clr/hipamd/src/hip_peer.cpp +++ b/projects/clr/hipamd/src/hip_peer.cpp @@ -40,11 +40,7 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe if (deviceId == peerDeviceId) { *canAccessPeer = 0; } else { -#if USE_PEER_TO_PEER>=2 *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); -#else - *canAccessPeer = 0; -#endif } } else { @@ -67,11 +63,7 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { -#if USE_PEER_TO_PEER>=2 bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); -#else - bool canAccessPeer = 0; -#endif if (! canAccessPeer) { err = hipErrorInvalidDevice; // P2P not allowed between these devices. } else if (thisDevice == peerDevice) { @@ -80,10 +72,8 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { -#if USE_PEER_TO_PEER>=3 // Update the peers for all memory already saved in the tracker: am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); -#endif } else { err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. } From 73e14be84b45dc4eb7017cd379c3d939ad8def20 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 16 Apr 2016 17:21:53 -0500 Subject: [PATCH 3/4] Added copyright [ROCm/clr commit: 1aac0cc9e502cd6e07c709260d1a362fc2f1ab63] --- projects/clr/hipamd/src/hip_peer.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/projects/clr/hipamd/src/hip_peer.cpp b/projects/clr/hipamd/src/hip_peer.cpp index 900f2a6efb..98bfdd0041 100644 --- a/projects/clr/hipamd/src/hip_peer.cpp +++ b/projects/clr/hipamd/src/hip_peer.cpp @@ -40,7 +40,11 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDe if (deviceId == peerDeviceId) { *canAccessPeer = 0; } else { - *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); +#if USE_PEER_TO_PEER>=2 + *canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); +#else + *canAccessPeer = 0; +#endif } } else { @@ -63,7 +67,12 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) auto thisDevice = ihipGetTlsDefaultDevice(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { - bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); +#if USE_PEER_TO_PEER>=2 + bool canAccessPeer = peerDevice->_acc.get_is_peer(thisDevice->_acc); +#else + bool canAccessPeer = 0; +#endif + if (! canAccessPeer) { err = hipErrorInvalidDevice; // P2P not allowed between these devices. } else if (thisDevice == peerDevice) { @@ -72,8 +81,10 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) LockedAccessor_DeviceCrit_t crit(thisDevice->criticalData()); bool changed = crit->removePeer(peerDevice); if (changed) { +#if USE_PEER_TO_PEER>=3 // Update the peers for all memory already saved in the tracker: am_memtracker_update_peers(thisDevice->_acc, crit->peerCnt(), crit->peerAgents()); +#endif } else { err = hipErrorPeerAccessNotEnabled; // never enabled P2P access. } From e71db391ab8db62a9b749c156ce5b935cee0b46e Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 16 Apr 2016 17:38:06 -0500 Subject: [PATCH 4/4] added memcpy and p2p test [ROCm/clr commit: e22a910ccc06ed647d301b3a099fc8edf3c4be26] --- projects/clr/hipamd/tests/src/CMakeLists.txt | 3 +- .../clr/hipamd/tests/src/hipMemcpyAll.cpp | 128 ++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 projects/clr/hipamd/tests/src/hipMemcpyAll.cpp diff --git a/projects/clr/hipamd/tests/src/CMakeLists.txt b/projects/clr/hipamd/tests/src/CMakeLists.txt index 5b1829286d..e58298248b 100644 --- a/projects/clr/hipamd/tests/src/CMakeLists.txt +++ b/projects/clr/hipamd/tests/src/CMakeLists.txt @@ -176,7 +176,7 @@ make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp) make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp) make_hip_executable (hipFuncDeviceSynchronize hipFuncDeviceSynchronize.cpp) make_hip_executable (hipPeerToPeer_simple hipPeerToPeer_simple.cpp) - +make_hip_executable (hipMemcpyAll hipMemcpyAll.cpp) make_hip_executable (hipMultiThreadDevice hipMultiThreadDevice.cpp) make_test(hip_ballot " " ) @@ -193,6 +193,7 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) make_test(hipEnvVarDriver " " ) +make_test(hipMemcpyAll " ") #TODO -reenable #make_test(hipPointerAttrib " " ) #make_test(hipMultiThreadStreams1 " " ) Fails if 0x3 specified, passes otherwise. diff --git a/projects/clr/hipamd/tests/src/hipMemcpyAll.cpp b/projects/clr/hipamd/tests/src/hipMemcpyAll.cpp new file mode 100644 index 0000000000..c8484aa32f --- /dev/null +++ b/projects/clr/hipamd/tests/src/hipMemcpyAll.cpp @@ -0,0 +1,128 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include +#include"test_common.h" + +#define len 1024*1024 +#define size len * sizeof(float) + +template +void hmemset(T *ptr, T value) +{ + for(int i=0;i