From cb5cf20c21e60ca9cfb5175bf3d03ab666a03636 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 20 Jul 2017 14:41:30 +0530 Subject: [PATCH 01/52] Renable frexp(f) device math function Change-Id: I53c022b8ddf38cd17ddb42eba457b9020db66395 --- hipamd/include/hip/hcc_detail/math_functions.h | 4 ++-- hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp | 2 +- hipamd/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/math_functions.h b/hipamd/include/hip/hcc_detail/math_functions.h index 9faff2743a..79bacf274b 100644 --- a/hipamd/include/hip/hcc_detail/math_functions.h +++ b/hipamd/include/hip/hcc_detail/math_functions.h @@ -60,7 +60,7 @@ __device__ float fmaf(float x, float y, float z); __device__ float fmaxf(float x, float y); __device__ float fminf(float x, float y); __device__ float fmodf(float x, float y); -//__device__ float frexpf(float x, int* nptr); +__device__ float frexpf(float x, int* nptr); __device__ float hypotf(float x, float y); __device__ float ilogbf(float x); __device__ int isfinite(float a); @@ -146,7 +146,7 @@ __device__ double fma(double x, double y, double z); __device__ double fmax(double x, double y); __device__ double fmin(double x, double y); __device__ double fmod(double x, double y); -//__device__ double frexp(double x, int *nptr); +__device__ double frexp(double x, int *nptr); __device__ double hypot(double x, double y); __device__ double ilogb(double x); __device__ int isfinite(double x); diff --git a/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp index f4f7ab0479..0a81d111c2 100644 --- a/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp @@ -69,7 +69,7 @@ __device__ void double_precision_math_functions() fmax(0.0, 0.0); fmin(0.0, 0.0); fmod(0.0, 1.0); -// frexp(0.0, &iX); + frexp(0.0, &iX); hypot(1.0, 0.0); ilogb(1.0); isfinite(0.0); diff --git a/hipamd/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp b/hipamd/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp index de3dec35ef..a52b1a22fe 100644 --- a/hipamd/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp @@ -70,7 +70,7 @@ __device__ void single_precision_math_functions() fmaxf(0.0f, 0.0f); fminf(0.0f, 0.0f); fmodf(0.0f, 1.0f); - //frexpf(0.0f, &iX); + frexpf(0.0f, &iX); hypotf(1.0f, 0.0f); ilogbf(1.0f); isfinite(0.0f); From b3f8c3fb8e6c9724bc6d66a93d852ea7b77f48af Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 21 Jul 2017 15:50:12 -0500 Subject: [PATCH 02/52] fixed device selection during compilation to use rocm_agent_enumerator 1. Changed hipcc to use rocm_agent_enumerator 2. Changed square sample test to use device variable --- hipamd/bin/hipcc | 22 +++++++++++++++++++ .../samples/0_Intro/square/square.hipref.cpp | 5 +++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index bcd3e3a591..2dceaa295f 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -103,6 +103,28 @@ if ($HIP_PLATFORM eq "hcc") { $HIPLDFLAGS = `${HCC_HOME}/bin/hcc-config --ldflags`; + $ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator"; + + my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`; + my @agentsLine = split('\n', $myAgents); + + foreach my $val (@agentsLine) { + if($val eq "gfx701") { + $target_gfx701 = 1; + } + if($val eq "gfx801") { + $target_gfx801 = 1; + } + if($val eq "gfx802") { + $target_gfx802 = 1; + } + if($val eq "gfx803") { + $target_gfx803 = 1; + } + if($val eq "gfx900") { + $target_gfx900 = 1; + } + } #### GCC system includes workaround #### $HCC_WA_FLAGS = " "; diff --git a/hipamd/samples/0_Intro/square/square.hipref.cpp b/hipamd/samples/0_Intro/square/square.hipref.cpp index e694bfb8a4..167cb135f4 100644 --- a/hipamd/samples/0_Intro/square/square.hipref.cpp +++ b/hipamd/samples/0_Intro/square/square.hipref.cpp @@ -54,9 +54,10 @@ int main(int argc, char *argv[]) float *A_h, *C_h; size_t N = 1000000; size_t Nbytes = N * sizeof(float); - + static int device = 0; + CHECK(hipSetDevice(device)); hipDeviceProp_t props; - CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); + CHECK(hipGetDeviceProperties(&props, device/*deviceID*/)); printf ("info: running on device %s\n", props.name); #ifdef __HIP_PLATFORM_HCC__ printf ("info: architecture on AMD GPU device is: %d\n",props.gcnArch); From cb613bb6a864764d068d4aa30238d74b94aa0ecd Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 7 Oct 2017 07:46:40 -0700 Subject: [PATCH 03/52] Add copy_coherency test Test cache managemenent and synchronization between kernel and copy commands in separate queues. --- .../synchronization/copy_coherency.cpp | 391 ++++++++++++++++++ .../synchronization/memcpyInt.device.cpp | 13 + 2 files changed, 404 insertions(+) create mode 100644 hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp create mode 100644 hipamd/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp diff --git a/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp new file mode 100644 index 0000000000..2e514e1e3a --- /dev/null +++ b/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp @@ -0,0 +1,391 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//ROCM_TARGET=gfx900 hipcc --genco memcpyInt.device.cpp -o memcpyInt.hsaco +//hipcc copy_coherency.cpp -I ~/X/HIP/tests/src/ ~/X/HIP/tests/src/test_common.cpp + + +// TODO - add code object support here. +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * RUN: %t + * HIT_END + */ + + +// Test cache management (fences) and synchronization between kernel and copy commands. +// Exhaustively tests 3 command types (copy, kernel, module kernel), +// many sync types (see SyncType), followed by another command, across a sweep +// of data sizes designed to stress various levels of the memory hierarchy. + +#include "hip/hip_runtime.h" +#include "test_common.h" + +// TODO - turn this back on when test infra can copy the module files to use as test inputs. +#define SKIP_MODULE_KERNEL 1 + + +class MemcpyFunction +{ +public: + MemcpyFunction(const char *fileName, const char *functionName) { load(fileName, functionName); }; + void load(const char *fileName, const char *functionName); + void launch(int * dst, const int * src, size_t numElements, hipStream_t s); + +private: + hipFunction_t _function; + hipModule_t _module; +}; + + +void MemcpyFunction::load(const char *fileName, const char *functionName) +{ +#if SKIP_MODULE_KERNEL!=1 + HIPCHECK(hipModuleLoad(&_module, fileName)); + HIPCHECK(hipModuleGetFunction(&_function, _module, functionName)); +#endif +}; + + +void MemcpyFunction::launch(int * dst, const int * src, size_t numElements, hipStream_t s) +{ + struct { + uint32_t _hidden[6]; + int* _dst; + const int* _src; + size_t _numElements; + } args; + + args._dst = dst; + args._src = src; + args._numElements = numElements; + + size_t size = sizeof(args); + void *config[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + HIPCHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock, 1, 1, 0/*dynamicShared*/, s, NULL, (void**)&config)); +}; + +bool g_warnOnFail = true; +//int g_elementSizes[] = {1, 16, 1024, 524288, 16*1000*1000}; // TODO +int g_elementSizes[] = {128*1000, 256*1000, 16*1000*1000}; + +MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); + + + +// Set value of array to specified 32-bit integer: +__global__ void +memsetIntKernel(int * ptr, const int val, size_t numElements) +{ + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; + for (size_t i= gid; i< numElements; i+=stride){ + ptr[i] = val; + } +}; + +__global__ void +memcpyIntKernel(int *dst, const int * src, size_t numElements) +{ + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; + for (size_t i= gid; i< numElements; i+=stride){ + dst[i] = src[i]; + } +}; + + +// CHeck arrays in reverse order, to more easily detect cases where +// the copy is "partially" done. +void checkReverse(const int *ptr, int numElements, int expected) { + int mismatchCnt = 0; + for (int i=numElements-1; i>=0; i--) { + if (ptr[i] != expected) { + fprintf (stderr, "%s**error: i=%d, ptr[i] == (%x) , does not equal expected (%x)\n%s", KRED, i, ptr[i], expected, KNRM); + if (!g_warnOnFail) { + assert (ptr[i] == expected); + } + if (++mismatchCnt >= 10) { + break; + } + } + } + + fprintf (stderr, "test: OK\n"); +} + +#define ENUM_CASE_STR(x) case x: return #x + +enum CmdType { + COPY, + KERNEL, + MODULE_KERNEL, + MAX_CmdType +}; + + +const char * CmdTypeStr(CmdType c) +{ + switch(c) { + ENUM_CASE_STR(COPY); + ENUM_CASE_STR(KERNEL); + ENUM_CASE_STR(MODULE_KERNEL); + default: return "UNKNOWN"; + }; +} + + +enum SyncType { + NONE, + EVENT_QUERY, + EVENT_SYNC, + STREAM_WAIT_EVENT, + STREAM_QUERY, + STREAM_SYNC, + DEVICE_SYNC, + MAX_SyncType +}; + + +const char * SyncTypeStr(SyncType s) +{ + switch(s) { + ENUM_CASE_STR(NONE); + ENUM_CASE_STR(EVENT_QUERY); + ENUM_CASE_STR(EVENT_SYNC); + ENUM_CASE_STR(STREAM_WAIT_EVENT); + ENUM_CASE_STR(STREAM_QUERY); + ENUM_CASE_STR(STREAM_SYNC); + ENUM_CASE_STR(DEVICE_SYNC); + default: return "UNKNOWN"; + }; +}; + + +void runCmd(CmdType cmd, int *dst, const int *src, hipStream_t s, size_t numElements) +{ + switch (cmd) { + case COPY: + HIPCHECK(hipMemcpyAsync(dst, src, numElements*sizeof(int), hipMemcpyDeviceToDevice, s)); + break; + case KERNEL: + { + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, s, + dst, src, numElements); + } + break; + case MODULE_KERNEL: + g_moduleMemcpy.launch(dst, src, numElements, s); + break; + default: + failed("unknown cmd=%d type", cmd); + }; +} + +void resetInputs( int * Ad, int * Bd, int *Cd, int *Ch, size_t numElements, int expected) +{ + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), + Ad, expected, numElements); + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), + Bd, 0xDEADBEEF, numElements); // poison with bad value to ensure is overwritten correctly + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), + Bd, 0xF000BA55, numElements); // poison with bad value to ensure is overwritten correctly + memset(Ch, 13, numElements*sizeof(int)); // poison with bad value to ensure is overwritten correctly + HIPCHECK(hipDeviceSynchronize()); +} + +// Intended to test proper synchronization and cache flushing between CMDA and CMDB. +// CMD are of type CmdType. All command copy memory, using either hipMemcpyAsync or kernel implementations. +// CmdA copies from Ad to Bd, +// Some form of synchronization is applied. +// Then cmdB copies from Bd to Cd. +// +// Cd is then copied to host Ch using a memory copy. +// +// Correct result at the end is that Ch contains the contents originally in Ad (integer 0x42) +void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, + hipStream_t stream1, hipStream_t stream2, int numElements, + int * Ad, int * Bd, int *Cd, int *Ch, + int expected) +{ + hipEvent_t e; + HIPCHECK(hipEventCreateWithFlags(&e,0)); + + resetInputs(Ad, Bd, Cd, Ch, numElements, expected); + + const size_t sizeElements = numElements * sizeof(int); + fprintf (stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", + sizeElements, (double) (sizeElements/1024.0), CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType)); + + if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { + fprintf (stderr, "warn: skipping since test infra does not yet support modules\n"); + return; + } + + + // Step A: + runCmd(cmdAType, Bd, Ad, stream1, numElements); + + + // Sync in-between? + switch (syncType) { + case NONE: + break; + case EVENT_QUERY: + { + hipError_t st = hipErrorNotReady; + HIPCHECK(hipEventRecord(e, stream1)); + do { + st = hipEventQuery(e); + } while (st == hipErrorNotReady); + HIPCHECK(st); + } + break; + case EVENT_SYNC: + HIPCHECK(hipEventRecord(e, stream1)); + HIPCHECK(hipEventSynchronize(e)); + break; + case STREAM_WAIT_EVENT: + HIPCHECK(hipEventRecord(e, stream1)); + HIPCHECK(hipStreamWaitEvent(stream2, e, 0)); + break; + case STREAM_QUERY: + { + hipError_t st = hipErrorNotReady; + do { + st = hipStreamQuery(stream1); + } while (st == hipErrorNotReady); + HIPCHECK(st); + } + break; + case STREAM_SYNC: + HIPCHECK(hipStreamSynchronize(stream1)); + break; + case DEVICE_SYNC: + HIPCHECK(hipDeviceSynchronize()); + break; + default: + fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType)); + return; // FIXME, this doesn't clean up + //failed("unknown sync type=%s", SyncTypeStr(syncType)); + }; + + + runCmd(cmdBType, Cd, Bd, stream2, numElements); + + + // Copy back to host, use async copy to avoid any extra synchronization that might mask issues. + HIPCHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost, stream2)); + HIPCHECK(hipStreamSynchronize(stream2)); + + checkReverse(Ch, numElements, expected); + + HIPCHECK(hipEventDestroy(e)); +}; + + +void testWrapper(size_t numElements) +{ + const size_t sizeElements = numElements * sizeof(int); + const int expected = 0x42; + int * Ad, * Bd, *Cd, *Ch; + + HIPCHECK(hipMalloc(&Ad, sizeElements)); + HIPCHECK(hipMalloc(&Bd, sizeElements)); + HIPCHECK(hipMalloc(&Cd, sizeElements)); + HIPCHECK(hipHostMalloc(&Ch, sizeElements)); // Ch is the end array + + + + hipStream_t stream1, stream2; + + HIPCHECK(hipStreamCreate(&stream1)); + HIPCHECK(hipStreamCreate(&stream2)); + + + HIPCHECK(hipDeviceSynchronize()); + fprintf (stderr, "test: init complete, start running tests\n"); + + + runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); + + for (int cmdA=0; cmdA + + + +extern "C" __global__ void +memcpyIntKernel(hipLaunchParm lp, int *dst, const int * src, size_t numElements) +{ + int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + int stride = hipBlockDim_x * hipGridDim_x ; + for (size_t i= gid; i< numElements; i+=stride){ + dst[i] = src[i]; + } +}; From c4612f3dc9e6717535578f6fbba56b9d6d98a2d2 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 7 Oct 2017 07:52:22 -0700 Subject: [PATCH 04/52] Test cleanup. --- .../runtimeApi/memory/p2p_copy_coherency.cpp | 31 ++++++++++++++----- hipamd/tests/src/test_common.h | 1 - 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index 0c2c387c2f..09c1fb28bb 100644 --- a/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -33,11 +33,13 @@ THE SOFTWARE. #ifdef __HIP_PLATFORM_HCC__ #include -#define USE_HCC_MEMTRACKER 0 #endif +#define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */ -int elementSizes[] = {16, 1024,524288}; + + +int elementSizes[] = {1, 16, 1024, 524288, 16*1000*1000}; int nSizes = sizeof(elementSizes) / sizeof(int); int enablePeers(int dev0, int dev1) @@ -57,8 +59,9 @@ int enablePeers(int dev0, int dev1) return 0; }; +// Set value of array to specified 32-bit integer: __global__ void -memsetIntKernel(/*hipLaunchParm lp,*/ int * ptr, const int val, size_t numElements) +memsetIntKernel(int * ptr, const int val, size_t numElements) { int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); int stride = hipBlockDim_x * hipGridDim_x ; @@ -68,7 +71,7 @@ memsetIntKernel(/*hipLaunchParm lp,*/ int * ptr, const int val, size_t numElemen }; __global__ void -memcpyIntKernel(/*hipLaunchParm lp, */const int * src, int* dst, size_t numElements) +memcpyIntKernel(const int * src, int* dst, size_t numElements) { int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); int stride = hipBlockDim_x * hipGridDim_x ; @@ -77,6 +80,9 @@ memcpyIntKernel(/*hipLaunchParm lp, */const int * src, int* dst, size_t numEleme } }; + +// CHeck arrays in reverse order, to more easily detect cases where +// the copy is "partially" done. void checkReverse(const int *ptr, int numElements, int expected) { for (int i=numElements-1; i>=0; i--) { if (ptr[i] != expected) { @@ -88,7 +94,8 @@ void checkReverse(const int *ptr, int numElements, int expected) { printf ("test: OK\n"); } -void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_t gpu1Stream, int numElements, + +void runTestImpl(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_t gpu1Stream, int numElements, int * dataGpu0_0, int * dataGpu0_1, int *dataGpu1, int *dataHost, int expected) { hipEvent_t e; @@ -96,7 +103,7 @@ void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_ HIPCHECK(hipEventCreateWithFlags(&e,0)); } const size_t sizeElements = numElements * sizeof(int); - printf ("test: runTest with %zu bytes %s with hostSync %s\n", sizeElements, stepAIsCopy ? "copy" : "kernel", hostSync ? "enabled" : "disabled"); + printf ("test: runTestImpl with %zu bytes %s with hostSync %s\n", sizeElements, stepAIsCopy ? "copy" : "kernel", hostSync ? "enabled" : "disabled"); hipStream_t stepAStream = gpu0Stream; @@ -127,6 +134,9 @@ void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_ HIPCHECK(hipStreamSynchronize(gpu0Stream)); checkReverse(dataHost, numElements, expected); + if(!hostSync) { + HIPCHECK(hipEventDestroy(e)); + } } void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMemcpy) @@ -163,12 +173,17 @@ void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMe #endif printf (" test: init complete\n"); - runTest(useMemcpy , hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0,dataGpu0_1, dataGpu1, dataHost, expected); + int stepAIsCopy = 0; + int stepBIsCopy = 1; + runTestImpl(true, hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0,dataGpu0_1, dataGpu1, dataHost, expected); HIPCHECK(hipFree(dataGpu0_0)); HIPCHECK(hipFree(dataGpu0_1)); HIPCHECK(hipFree(dataGpu1)); HIPCHECK(hipHostFree(dataHost)); + + HIPCHECK(hipStreamDestroy(gpu0Stream)); + HIPCHECK(hipStreamDestroy(gpu1Stream)); }; int main(int argc, char *argv[]) @@ -192,7 +207,7 @@ int main(int argc, char *argv[]) return -1; }; - for(int index = 1;index < nSizes;index++) { + for(int index = 0;index < nSizes;index++) { testMultiGpu(dev0, dev1, elementSizes[index] , false /* GPU Synchronization*/, true); testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, true); testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, false); diff --git a/hipamd/tests/src/test_common.h b/hipamd/tests/src/test_common.h index e3cc664d56..35f0aedb25 100644 --- a/hipamd/tests/src/test_common.h +++ b/hipamd/tests/src/test_common.h @@ -23,7 +23,6 @@ THE SOFTWARE. #include #include "hip/hip_runtime.h" -#include "hip/hip_texture_types.h" #include "hip/hip_runtime_api.h" #define HC __attribute__((hc)) From e738fa66b5af4c9c8c1d1578825dfca5d8b55b69 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 14:49:29 +0000 Subject: [PATCH 05/52] Modify device properties to use pool API. - Also better error code checking --- hipamd/src/hip_hcc.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 2c98ac804f..aa8dbd0072 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -695,26 +695,26 @@ int checkAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool) return access; } -hsa_status_t get_region_info(hsa_region_t region, void* data) +hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { hsa_status_t err; hipDeviceProp_t* p_prop = reinterpret_cast(data); uint32_t region_segment; - // Get region segment - err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment); + // Get pool segment + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, ®ion_segment); ErrorCheck(err); switch(region_segment) { case HSA_REGION_SEGMENT_READONLY: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break; - /* case HSA_REGION_SEGMENT_PRIVATE: - cout<<"PRIVATE"<totalConstMem)); break; case HSA_REGION_SEGMENT_GROUP: - err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->sharedMemPerBlock)); break; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock)); + printf ("shared_mem err=%d mem=%zu\n", err, p_prop->sharedMemPerBlock); + break; default: break; } - return HSA_STATUS_SUCCESS; + return err; } @@ -748,8 +748,10 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) // Set some defaults in case we don't find the appropriate regions: prop->totalGlobalMem = 0; prop->totalConstMem = 0; - prop-> maxThreadsPerMultiProcessor = 0; + prop->maxThreadsPerMultiProcessor = 0; prop->regsPerBlock = 0; + prop->totalConstMem = 0; + prop->sharedMemPerBlock = 0; if (_hsaAgent.handle == -1) { return hipErrorInvalidDevice; @@ -849,15 +851,18 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu; // Get memory properties - err = hsa_agent_iterate_regions(_hsaAgent, get_region_info, prop); + err = hsa_amd_agent_iterate_memory_pools(_hsaAgent, get_pool_info, prop); + if (err == HSA_STATUS_INFO_BREAK) { + err = HSA_STATUS_SUCCESS; + } DeviceErrorCheck(err); - // Get the size of the region we are using for Accelerator Memory allocations: + // Get the size of the pool we are using for Accelerator Memory allocations: hsa_region_t *am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. + // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group pool size. prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Get Max memory clock frequency From ed85b15c3e0afb93406ab74cdfb870931af5f3f8 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 15:50:18 +0000 Subject: [PATCH 06/52] Update device properties. - clear properties to defined initial state. - enable some property flags which are now supported. --- hipamd/include/hip/hcc_detail/hip_runtime.h | 3 +-- hipamd/src/hip_hcc.cpp | 12 +++--------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 379fc05f5b..9fcc02b2d7 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -108,13 +108,12 @@ extern int HIP_TRACE_API; #if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0) // Device compile and not host compile: -//TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these. // 32-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) // 64-bit Atomics: #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index aa8dbd0072..d2d28c2a93 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -745,13 +745,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) hipError_t e = hipSuccess; hsa_status_t err; - // Set some defaults in case we don't find the appropriate regions: - prop->totalGlobalMem = 0; - prop->totalConstMem = 0; - prop->maxThreadsPerMultiProcessor = 0; - prop->regsPerBlock = 0; - prop->totalConstMem = 0; - prop->sharedMemPerBlock = 0; + memset(prop, 0, sizeof(hipDeviceProp_t)); if (_hsaAgent.handle == -1) { return hipErrorInvalidDevice; @@ -882,7 +876,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasGlobalFloatAtomicExch = 1; prop->arch.hasSharedInt32Atomics = 1; prop->arch.hasSharedFloatAtomicExch = 1; - prop->arch.hasFloatAtomicAdd = 0; + prop->arch.hasFloatAtomicAdd = 1; // supported with CAS loop, but is supported prop->arch.hasGlobalInt64Atomics = 1; prop->arch.hasSharedInt64Atomics = 1; prop->arch.hasDoubles = 1; @@ -890,7 +884,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) prop->arch.hasWarpBallot = 1; prop->arch.hasWarpShuffle = 1; prop->arch.hasFunnelShift = 0; // TODO-hcc - prop->arch.hasThreadFenceSystem = 0; // TODO-hcc + prop->arch.hasThreadFenceSystem = 1; prop->arch.hasSyncThreadsExt = 0; // TODO-hcc prop->arch.hasSurfaceFuncs = 0; // TODO-hcc prop->arch.has3dGrid = 1; From b4c7876244dd35b6ef2e36aa90ef37abb2d55fac Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 20 Oct 2017 13:24:04 -0700 Subject: [PATCH 07/52] Remove printf --- hipamd/src/hip_hcc.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index d2d28c2a93..6a614b1826 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -710,7 +710,6 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->totalConstMem)); break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock)); - printf ("shared_mem err=%d mem=%zu\n", err, p_prop->sharedMemPerBlock); break; default: break; } From da7c37947c81e214fd8aac4072a80fed76a5d4ff Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Mon, 23 Oct 2017 16:47:40 -0400 Subject: [PATCH 08/52] replace __hcc_workweek__ with HC_FEATURE_PRINTF flag --- hipamd/src/hip_hcc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 2d67c31fe7..ae53d7ae45 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -616,7 +616,7 @@ void ihipDevice_t::locked_reset() //FIXME - Calling am_memtracker_reset is really bad since it destroyed all buffers allocated by the HCC runtime as well //such as the printf buffer. Re-initialze the printf buffer as a workaround for now. -#if (__hcc_workweek__ >= 17423) +#ifdef HC_FEATURE_PRINTF Kalmar::getContext()->initPrintfBuffer(); #endif }; From 36c2795dd4045b74fe38f6429bcecf098bfd80fd Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Mon, 23 Oct 2017 16:59:14 -0400 Subject: [PATCH 09/52] add a printf device function --- hipamd/include/hip/hcc_detail/hip_runtime.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 379fc05f5b..07c7a0d4b3 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -50,10 +50,16 @@ THE SOFTWARE. #include +// define HIP_ENABLE_PRINTF to enable printf +#ifdef HIP_ENABLE_PRINTF + #define HCC_ENABLE_ACCELERATOR_PRINTF 1 +#endif + //--- // Remainder of this file only compiles with HCC #if defined __HCC__ #include +#include "hc_printf.hpp" //TODO-HCC-GL - change this to typedef. //typedef grid_launch_parm hipLaunchParm ; @@ -420,6 +426,20 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) } +#ifdef __HCC_ACCELERATOR__ + +#ifdef HC_FEATURE_PRINTF +template +static inline __device__ void printf(const char* format, All... all) { + hc::printf(format, all...); +} +#else +template +static inline __device__ void printf(const char* format, All... all) { } +#endif + +#endif + #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) From f1992c5b113b6c271b9b135cef9c2a29e8317384 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Mon, 23 Oct 2017 18:31:13 -0400 Subject: [PATCH 10/52] add an unit test for printf --- hipamd/tests/src/kernel/hipPrintfKernel.cpp | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 hipamd/tests/src/kernel/hipPrintfKernel.cpp diff --git a/hipamd/tests/src/kernel/hipPrintfKernel.cpp b/hipamd/tests/src/kernel/hipPrintfKernel.cpp new file mode 100644 index 0000000000..482098fd54 --- /dev/null +++ b/hipamd/tests/src/kernel/hipPrintfKernel.cpp @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#define HIP_ENABLE_PRINTF + +#include"test_common.h" + +__global__ void run_printf(hipLaunchParm lp){ + printf("Hello World\n"); +} + +int main(){ +hipLaunchKernel(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0); +hipDeviceSynchronize(); +passed(); +} From 4ed3e9e15f44670487becde77f9d6846fd1eb55c Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Tue, 24 Oct 2017 00:55:41 -0400 Subject: [PATCH 11/52] printf support for module API --- hipamd/src/hip_module.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 74b0092b84..85c88a1826 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -259,6 +259,11 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname){ status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &(*module)->executable); CHECKLOG_HSA(status, hipErrorNotInitialized); + status = hsa_executable_agent_global_variable_define((*module)->executable, agent, + "_ZN2hc13printf_bufferE", + Kalmar::getContext()->getPrintfBufferPointerVA()); + CHECKLOG_HSA(status, hipErrorNotInitialized); + status = hsa_executable_load_code_object((*module)->executable, agent, (*module)->object, NULL); CHECKLOG_HSA(status, hipErrorNotInitialized); From 085d63099c2767323a410d29d7a5a2916a92178c Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Wed, 25 Oct 2017 12:00:02 -0400 Subject: [PATCH 12/52] add HC_FEATURE_PRINTF around the printf buffer definition --- hipamd/src/hip_module.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 85c88a1826..962b6084f1 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -259,10 +259,12 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname){ status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &(*module)->executable); CHECKLOG_HSA(status, hipErrorNotInitialized); +#ifdef HC_FEATURE_PRINTF status = hsa_executable_agent_global_variable_define((*module)->executable, agent, "_ZN2hc13printf_bufferE", Kalmar::getContext()->getPrintfBufferPointerVA()); CHECKLOG_HSA(status, hipErrorNotInitialized); +#endif status = hsa_executable_load_code_object((*module)->executable, agent, (*module)->object, NULL); CHECKLOG_HSA(status, hipErrorNotInitialized); From cc2bcbcbcfec814c272daf9491629f8bc4763852 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 25 Oct 2017 16:02:18 -0500 Subject: [PATCH 13/52] Clean up test to address review feedback. --- .../tests/src/runtimeApi/memory/p2p_copy_coherency.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index 09c1fb28bb..cad5065e78 100644 --- a/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/hipamd/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -139,7 +139,7 @@ void runTestImpl(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStr } } -void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMemcpy) +void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync) { const size_t sizeElements = numElements * sizeof(int); @@ -173,8 +173,6 @@ void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMe #endif printf (" test: init complete\n"); - int stepAIsCopy = 0; - int stepBIsCopy = 1; runTestImpl(true, hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0,dataGpu0_1, dataGpu1, dataHost, expected); HIPCHECK(hipFree(dataGpu0_0)); @@ -208,10 +206,8 @@ int main(int argc, char *argv[]) }; for(int index = 0;index < nSizes;index++) { - testMultiGpu(dev0, dev1, elementSizes[index] , false /* GPU Synchronization*/, true); - testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, true); - testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, false); - testMultiGpu(dev0, dev1, elementSizes[index] , false /*Host Synchronization*/, false); + testMultiGpu(dev0, dev1, elementSizes[index] , false /*GPU Synchronization*/); + testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/); } From 698721be345db59bc50dd98dcb25c42a871d5128 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Wed, 25 Oct 2017 22:16:37 +0000 Subject: [PATCH 14/52] Enhance debug for copy pointers - show more pointer tracking fields - show pointer info before and after "tailoring' --- hipamd/src/hip_hcc.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 2d67c31fe7..c3d824dbaf 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1805,7 +1805,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, } } else { *forceUnpinnedCopy = true; - tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n", + tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n", *copyDevice ? (*copyDevice)->getDeviceNum() : -1, *copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0); if (HIP_FAIL_SOC & 0x2) { @@ -1820,10 +1820,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo) { - tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n", + tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n", tag, ptr, ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes, - ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged); + ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged, + ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr); } @@ -1871,12 +1872,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte }; -bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes) +bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes) { bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS); + printPointerInfo(DB_COPY, tag, ptr, *ptrInfo); if (tracked) { tailorPtrInfo(ptrInfo, ptr, sizeBytes); + printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo); } return tracked; @@ -1906,8 +1909,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); #endif - bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); - bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); + bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes); + bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes); // Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here: @@ -2034,21 +2037,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); #endif - bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); - bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); + tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes); + bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes); + bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes); hc::hcCommandKind hcCopyDir; ihipCtx_t *copyDevice; bool forceUnpinnedCopy; resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device, &forceUnpinnedCopy); - tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n", + tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n", copyDevice ? copyDevice->getDeviceNum():-1, - dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, - src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, - sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); - printPointerInfo(DB_COPY, " src", src, srcPtrInfo); + hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. From a4172415071519576de9864b0647267ce4dfef35 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 26 Oct 2017 16:26:25 +0000 Subject: [PATCH 15/52] Fix bug with peer-to-peer combined with context API - Store context inside the tracker rather than using int deviceID that was always mapped to primary context - IsPeerWatcher now based on device IDs rather than specific peers. --- hipamd/src/hip_hcc.cpp | 37 ++++++++++++++++++++++++++++++----- hipamd/src/hip_hcc_internal.h | 11 ++++++++++- hipamd/src/hip_memory.cpp | 8 ++++++++ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index c3d824dbaf..bcb56d5479 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -47,6 +47,9 @@ THE SOFTWARE. #include "trace_helper.h" #include "env.h" +//TODO - create a stream-based debug interface as an additional option for tprintf +#define DB_PEER_CTX 0 + //================================================================================================= //Global variables: @@ -459,8 +462,20 @@ void ihipCtxCriticalBase_t::recomputePeerAgents() template<> bool ihipCtxCriticalBase_t::isPeerWatcher(const ihipCtx_t *peer) { - auto match = std::find(_peers.begin(), _peers.end(), peer); + auto match = std::find_if(_peers.begin(), _peers.end(), + [=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); }); + return (match != std::end(_peers)); + +#if 0 + for (auto pi=_peers.begin(); pi != _peers.end(); pi++) { + if ((*pi)->getDeviceNum() == peer->getDeviceNum()) { + return true; + } + } + + return false; +#endif } @@ -1677,18 +1692,24 @@ const char *ihipErrorString(hipError_t hip_error) // So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) { - // Make sure this is a device-to-device copy with all memory available to the requested copy engine // // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: if (dstPtrInfo->_sizeBytes == 0) { return false; } else { +#if USE_APP_PTR_FOR_CTX + ihipCtx_t *dstCtx = static_cast (dstPtrInfo->_appPtr); +#else ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId); +#endif if (copyEngineCtx != dstCtx) { // Only checks peer list if contexts are different LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); - //tprintf(DB_SYNC, "dstCrit lock succeeded\n"); +#if DB_PEER_CTX + std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt=" + << ctxCrit->peerCnt() << "\n"; +#endif if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { return false; }; @@ -1696,16 +1717,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin } - // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: if (srcPtrInfo->_sizeBytes == 0) { return false; } else { +#if USE_APP_PTR_FOR_CTX + ihipCtx_t *srcCtx = static_cast (srcPtrInfo->_appPtr); +#else ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId); +#endif if (copyEngineCtx != srcCtx) { // Only checks peer list if contexts are different LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); - //tprintf(DB_SYNC, "srcCrit lock succeeded\n"); +#if DB_PEER_CTX + std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt=" + << ctxCrit->peerCnt() << "\n"; +#endif if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { return false; }; diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 4b7e533a4c..a48a243a3b 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -32,10 +32,19 @@ THE SOFTWARE. #include "env.h" -#if defined(__HCC__) && (__hcc_workweek__ < 16354) +#if (__hcc_workweek__ < 16354) #error("This version of HIP requires a newer version of HCC."); #endif +// Use the __appPtr field in the am memtracker to store the context. +// Requires a bug fix in HCC +#if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0) +#define USE_APP_PTR_FOR_CTX 1 +#endif + + + + #define USE_IPC 1 //--- diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index a8324c5729..5a4b5f4b4e 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -61,7 +61,11 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags) auto device = ctx->getWriteableDevice(); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx); +#else hc::am_memtracker_update(ptr, device->_deviceId, hipFlags); +#endif if (shareWithAll) { hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr); @@ -660,7 +664,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) vecAcc.push_back(ihipGetDevice(i)->_acc); } am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx); +#else hc::am_memtracker_update(hostPtr, device->_deviceId, flags); +#endif tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size()); if(am_status == AM_SUCCESS){ From 71434383a3a8e5d703e90de15d1e3bb30bc30730 Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Mon, 30 Oct 2017 14:34:24 +0530 Subject: [PATCH 16/52] [newTests]Adding tests for device APIs --- .../device/hipDeviceComputeCapability.cpp | 46 ++++++++++++ .../runtimeApi/device/hipDeviceGetName.cpp | 45 +++++++++++ .../device/hipDeviceSynchronize.cpp | 74 +++++++++++++++++++ .../runtimeApi/device/hipDeviceTotalMem.cpp | 45 +++++++++++ .../src/runtimeApi/device/hipGetDevice.cpp | 44 +++++++++++ .../device/hipRuntimeGetVersion.cpp | 39 ++++++++++ .../runtimeApi/device/hipSetCachceConfig.cpp | 36 +++++++++ .../src/runtimeApi/device/hipSetDevice.cpp | 36 +++++++++ .../runtimeApi/device/hipSetDeviceFlags.cpp | 52 +++++++++++++ 9 files changed, 417 insertions(+) create mode 100644 hipamd/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipDeviceTotalMem.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipGetDevice.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipRuntimeGetVersion.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipSetCachceConfig.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipSetDevice.cpp create mode 100644 hipamd/tests/src/runtimeApi/device/hipSetDeviceFlags.cpp diff --git a/hipamd/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp b/hipamd/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp new file mode 100644 index 0000000000..5c9af3d762 --- /dev/null +++ b/hipamd/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp @@ -0,0 +1,46 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + int numDevices = 0; + int major,minor; + hipDevice_t device; + HIPCHECK(hipGetDeviceCount(&numDevices)); + for(int i=0;i= 0); + HIPASSERT(minor >= 0); + } + passed(); +} diff --git a/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp new file mode 100644 index 0000000000..94de10ff4c --- /dev/null +++ b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp @@ -0,0 +1,45 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + int numDevices = 0; + char name[256]; + hipDevice_t device; + HIPCHECK(hipGetDeviceCount(&numDevices)); + for(int i=0;i +#include +#include"test_common.h" + +int main(){ + hipFuncCache_t cacheConfig; + void *func; + hipFuncSetCacheConfig(func, cacheConfig); + passed(); +} + diff --git a/hipamd/tests/src/runtimeApi/device/hipSetDevice.cpp b/hipamd/tests/src/runtimeApi/device/hipSetDevice.cpp new file mode 100644 index 0000000000..b1b7cac12d --- /dev/null +++ b/hipamd/tests/src/runtimeApi/device/hipSetDevice.cpp @@ -0,0 +1,36 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t EXCLUDE_HIP_PLATFORM + * HIT_END + */ + +#include "test_common.h" + +int main(){ + int numDevices = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + for(int i=0;i Date: Mon, 30 Oct 2017 16:58:03 +0000 Subject: [PATCH 17/52] Check for null copyEngine before looking at peers. --- hipamd/src/hip_hcc.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index bcb56d5479..554ea90d15 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -466,16 +466,6 @@ bool ihipCtxCriticalBase_t::isPeerWatcher(const ihipCtx_t *peer) [=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); }); return (match != std::end(_peers)); - -#if 0 - for (auto pi=_peers.begin(); pi != _peers.end(); pi++) { - if ((*pi)->getDeviceNum() == peer->getDeviceNum()) { - return true; - } - } - - return false; -#endif } @@ -1692,6 +1682,10 @@ const char *ihipErrorString(hipError_t hip_error) // So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) { + if (copyEngineCtx == nullptr) { + return false; + } + // Make sure this is a device-to-device copy with all memory available to the requested copy engine // // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: From b98dede49a96299d61bc49a3942f3e904ba348ee Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Mon, 30 Oct 2017 14:02:50 -0500 Subject: [PATCH 18/52] Inital codes to remove x86_64 dependency in HIP source build --- hipamd/packaging/hip_base.txt | 2 +- hipamd/packaging/hip_doc.txt | 2 +- hipamd/packaging/hip_hcc.txt | 2 +- hipamd/packaging/hip_nvcc.txt | 2 +- hipamd/packaging/hip_samples.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hipamd/packaging/hip_base.txt b/hipamd/packaging/hip_base.txt index 836a82657b..f77e9ba3e8 100644 --- a/hipamd/packaging/hip_base.txt +++ b/hipamd/packaging/hip_base.txt @@ -28,7 +28,7 @@ set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0)") set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/hipamd/packaging/hip_doc.txt b/hipamd/packaging/hip_doc.txt index 6f602c84cf..daef7810b4 100644 --- a/hipamd/packaging/hip_doc.txt +++ b/hipamd/packaging/hip_doc.txt @@ -33,7 +33,7 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION})") set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") diff --git a/hipamd/packaging/hip_hcc.txt b/hipamd/packaging/hip_hcc.txt index 284d97e2e5..47ad89450c 100644 --- a/hipamd/packaging/hip_hcc.txt +++ b/hipamd/packaging/hip_hcc.txt @@ -37,7 +37,7 @@ else() set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@)") endif() set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/hipamd/packaging/hip_nvcc.txt b/hipamd/packaging/hip_nvcc.txt index 0d7c357623..254b7a956a 100644 --- a/hipamd/packaging/hip_nvcc.txt +++ b/hipamd/packaging/hip_nvcc.txt @@ -20,7 +20,7 @@ set(CPACK_BINARY_DEB "ON") #set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), cuda (>= 7.5)") set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") #set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") #set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/hipamd/packaging/hip_samples.txt b/hipamd/packaging/hip_samples.txt index 6d34a6fd40..c1707e42f1 100644 --- a/hipamd/packaging/hip_samples.txt +++ b/hipamd/packaging/hip_samples.txt @@ -21,7 +21,7 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION})") set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") +set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") From dc7d993a02cc04eb7f9e6a935614c1f6c3b41976 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 30 Oct 2017 20:18:41 +0000 Subject: [PATCH 19/52] Add ns-level timer for HIP API routines Refactor some miuses of ihipLogStatus, these should only be in top-level HIP APIs and should be paired with HIP_API_INIT calls. --- hipamd/src/hip_device.cpp | 10 +++++----- hipamd/src/hip_hcc.cpp | 8 ++++++-- hipamd/src/hip_hcc_internal.h | 16 ++++++++++++---- hipamd/src/hip_module.cpp | 12 +++++++----- hipamd/src/hip_stream.cpp | 1 - 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index ff511b5509..521b56b0e9 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -60,12 +60,12 @@ hipError_t ihipGetDeviceCount(int *count) *count = g_deviceCnt; if (*count > 0) { - e = ihipLogStatus(hipSuccess); + e = hipSuccess; } else { - e = ihipLogStatus(hipErrorNoDevice); + e = hipErrorNoDevice; } } else { - e = ihipLogStatus(hipErrorInvalidValue); + e = hipErrorInvalidValue; } return e; } @@ -73,7 +73,7 @@ hipError_t ihipGetDeviceCount(int *count) hipError_t hipGetDeviceCount(int *count) { HIP_INIT_API(count); - return ihipGetDeviceCount(count); + return ihipLogStatus(ihipGetDeviceCount(count)); } hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) @@ -205,7 +205,7 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device hipError_t e = hipSuccess; if(pi == nullptr) { - return ihipLogStatus(hipErrorInvalidValue); + return hipErrorInvalidValue; } auto * hipDevice = ihipGetDevice(device); diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 915732bdce..611079f7c2 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -156,7 +156,7 @@ thread_local TidInfo tls_tidInfo; //================================================================================================= // Top-level "free" functions: //================================================================================================= -void recordApiTrace(std::string *fullStr, const std::string &apiStr) +uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr) { auto apiSeqNum = tls_tidInfo.apiSeqNum(); auto tid = tls_tidInfo.tid(); @@ -178,10 +178,14 @@ void recordApiTrace(std::string *fullStr, const std::string &apiStr) *fullStr += " "; *fullStr += apiStr; + uint64_t apiStartTick = getTicks(); + if (COMPILE_HIP_DB && HIP_TRACE_API) { - fprintf (stderr, "%s<c_str(), API_COLOR_END); + fprintf (stderr, "%s<c_str(), apiStartTick, API_COLOR_END); } + + return apiStartTick; } diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 4b7e533a4c..197cd35bfa 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -244,23 +244,28 @@ static const DbName dbName [] = #endif - +static inline uint64_t getTicks() +{ + return hc::get_system_ticks(); +} //--- -extern void recordApiTrace(std::string *fullStr, const std::string &apiStr); +extern uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr); #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) #define API_TRACE(forceTrace, ...)\ +uint64_t hipApiStartTick;\ {\ tls_tidInfo.incApiSeqNum();\ if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\ + auto ticks = getTicks() - hipApiStartTick;\ + fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \ + (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), \ + __func__, localHipStatus, ihipErrorString(localHipStatus), ticks, API_COLOR_END);\ }\ if (HIP_PROFILE_API) { MARKER_END(); }\ localHipStatus;\ diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index dee69e7ba0..e9e572af9b 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -374,6 +374,8 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname) hipError_t hipModuleUnload(hipModule_t hmod) { + HIP_INIT_API(hmod); + // TODO - improve this synchronization so it is thread-safe. // Currently we want for all inflight activity to complete, but don't prevent another // thread from launching new kernels before we finish this operation. @@ -408,7 +410,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char hipError_t ret = hipSuccess; if (name == nullptr){ - return ihipLogStatus(hipErrorInvalidValue); + return (hipErrorInvalidValue); } if (ctx == nullptr){ @@ -431,7 +433,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char hsa_executable_symbol_t symbol; status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol); if(status != HSA_STATUS_SUCCESS){ - return ihipLogStatus(hipErrorNotFound); + return hipErrorNotFound; } status = hsa_executable_symbol_get_info(symbol, @@ -503,11 +505,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){ kernArgSize = *(size_t*)(config[3]); } else { - return ihipLogStatus(hipErrorNotInitialized); + return hipErrorNotInitialized; } }else{ - return ihipLogStatus(hipErrorInvalidValue); + return hipErrorInvalidValue; } @@ -624,7 +626,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, } else{ hipFunction_t func; - ihipModuleGetSymbol(&func, hmod, name); + ret = ihipModuleGetSymbol(&func, hmod, name); *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); *dptr = reinterpret_cast(func->_object); return ihipLogStatus(ret); diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 7dd6efd39c..f62be31b9e 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -140,7 +140,6 @@ hipError_t hipStreamQuery(hipStream_t stream) //--- hipError_t hipStreamSynchronize(hipStream_t stream) { - HIP_INIT_API(stream); HIP_INIT_SPECIAL_API(TRACE_SYNC, stream); hipError_t e = hipSuccess; From cb454d5366871ec135022bd4b23243b9c83caf01 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Tue, 31 Oct 2017 18:14:07 +0000 Subject: [PATCH 20/52] use libc++ on centos 7 and rhel 7.4 --- hipamd/bin/hipcc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 8f18ab0d8e..bd01177a77 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -127,7 +127,9 @@ if ($HIP_PLATFORM eq "hcc") { # Force -stdlib=libc++ on UB14.04 $HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`; - if ($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"") { + if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"") + or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"") + or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) { $HIPCXXFLAGS .= " -stdlib=libc++"; $setStdLib = 1; } From c1a4ff7b20ddde59889ffbd6d5d5f986da24f6d4 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Tue, 31 Oct 2017 18:16:12 +0000 Subject: [PATCH 21/52] Centos/RHEL - remove usage of constexpr since libc++ doesn't enable ctor for constexpr pair in C++11 --- hipamd/src/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index dee69e7ba0..da889a2e03 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -217,7 +217,7 @@ namespace using namespace ELFIO; using namespace std; - static constexpr pair r{0, 0}; + static const pair r{0, 0}; for (auto i = 0u; i != section.get_symbols_num(); ++i) { // TODO: this is boyscout code, caching the temporaries From 172b9f96e657dda0bf3a61cb574fbbf7c28e536d Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Tue, 31 Oct 2017 22:26:26 +0000 Subject: [PATCH 22/52] Add HIP_DB=warn + message if sync on dangerous event. --- hipamd/src/hip_event.cpp | 8 ++++++++ hipamd/src/hip_hcc_internal.h | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index d1ee37a45e..93ef63f2e3 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -191,6 +191,10 @@ hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_SPECIAL_API(TRACE_SYNC, event); + if (!(event->_flags & hipEventReleaseToSystem)) { + tprintf(DB_WARN, "hipEventSynchronize on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); + } + if (event) { if (event->_state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidResourceHandle); @@ -268,6 +272,10 @@ hipError_t hipEventQuery(hipEvent_t event) { HIP_INIT_SPECIAL_API(TRACE_QUERY, event); + if (!(event->_flags & hipEventReleaseToSystem)) { + tprintf(DB_WARN, "hipEventQuery on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); + } + if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) { return ihipLogStatus(hipErrorNotReady); } else { diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 197cd35bfa..27c323887c 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -209,7 +209,8 @@ extern const char *API_COLOR_END; #define DB_SYNC 1 /* 0x02 - trace synchronization pieces */ #define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */ #define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */ -#define DB_MAX_FLAG 4 +#define DB_WARN 4 /* 0x10 - warn about sub-optimal or shady behavior */ +#define DB_MAX_FLAG 5 // When adding a new debug flag, also add to the char name table below. // // @@ -226,6 +227,7 @@ static const DbName dbName [] = {KYEL, "sync"}, {KCYN, "mem"}, {KMAG, "copy"}, + {KRED, "warn"}, }; From 58a18eaf0cc03fbcceb60795f3002280ca73e0ae Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 1 Nov 2017 22:51:22 +0000 Subject: [PATCH 23/52] This corrects some interesting choices that were present in the HIP unit tests such as e.g. de-allocating memory allocated with new[] using free. All of these were identified via cppcheck. --- hipamd/tests/src/context/hipMemsetD8.cpp | 3 +- hipamd/tests/src/deviceLib/hipTestDevice.cpp | 141 +++++++++++++++--- .../src/deviceLib/hipTestDeviceDouble.cpp | 124 +++++++++++++-- .../src/experimental/xcompile/hipxxKer.cpp | 10 +- .../src/kernel/hipLanguageExtensions.cpp | 4 +- hipamd/tests/src/kernel/hipTestMemKernel.cpp | 45 ++++-- hipamd/tests/src/stress/hipStressKernel.cpp | 3 + 7 files changed, 278 insertions(+), 52 deletions(-) diff --git a/hipamd/tests/src/context/hipMemsetD8.cpp b/hipamd/tests/src/context/hipMemsetD8.cpp index 3730fcb70b..a356d05b76 100644 --- a/hipamd/tests/src/context/hipMemsetD8.cpp +++ b/hipamd/tests/src/context/hipMemsetD8.cpp @@ -46,7 +46,6 @@ int main(int argc, char *argv[]) A_h = new char[Nbytes]; HIPCHECK ( hipMalloc((void **) &A_d, Nbytes) ); - A_h = (char*)malloc(Nbytes); printf ("Size=%zu memsetval=%2x \n", Nbytes, memsetval); HIPCHECK ( hipMemsetD8(A_d, memsetval, Nbytes) ); @@ -61,7 +60,7 @@ int main(int argc, char *argv[]) } hipFree((void *) A_d); - free(A_h); + delete [] A_h; passed(); } diff --git a/hipamd/tests/src/deviceLib/hipTestDevice.cpp b/hipamd/tests/src/deviceLib/hipTestDevice.cpp index 570f3baaf0..fa85940839 100644 --- a/hipamd/tests/src/deviceLib/hipTestDevice.cpp +++ b/hipamd/tests/src/deviceLib/hipTestDevice.cpp @@ -139,7 +139,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -174,7 +181,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -205,7 +219,13 @@ for(int i=0;i<512;i++){ } } -free(A); +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -234,7 +254,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -263,7 +288,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -291,7 +321,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -321,7 +356,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -350,7 +390,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -387,7 +432,16 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); + if(passed == 1){ return true; } @@ -427,7 +481,18 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +delete [] E; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); +hipFree(Ed); + if(passed == 1){ return true; } @@ -457,7 +522,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -489,7 +559,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -525,7 +602,16 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); + if(passed == 1){ return true; } @@ -565,7 +651,18 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +delete [] E; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); +hipFree(Ed); + if(passed == 1){ return true; } @@ -595,7 +692,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -622,7 +724,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -631,7 +738,7 @@ return false; } int main(){ -if(run_sincosf() && run_sincospif() && run_fdividef() && +if(run_sincosf() && run_sincospif() && run_fdividef() && run_llrintf() && run_norm3df() && run_norm4df() && run_normf() && run_rnorm3df() && run_rnorm4df() && run_rnormf() && run_lroundf() && run_llroundf() && diff --git a/hipamd/tests/src/deviceLib/hipTestDeviceDouble.cpp b/hipamd/tests/src/deviceLib/hipTestDeviceDouble.cpp index 5bdbbf1b8f..3b919d0cab 100644 --- a/hipamd/tests/src/deviceLib/hipTestDeviceDouble.cpp +++ b/hipamd/tests/src/deviceLib/hipTestDeviceDouble.cpp @@ -128,7 +128,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -163,7 +170,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -193,7 +207,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -221,7 +240,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -249,7 +273,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -278,7 +307,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -306,7 +340,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -343,7 +382,16 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); + if(passed == 1){ return true; } @@ -383,7 +431,18 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +delete [] E; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); +hipFree(Ed); + if(passed == 1){ return true; } @@ -416,7 +475,14 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); + if(passed == 1){ return true; } @@ -452,7 +518,16 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); + if(passed == 1){ return true; } @@ -492,7 +567,18 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +delete [] C; +delete [] D; +delete [] E; +hipFree(Ad); +hipFree(Bd); +hipFree(Cd); +hipFree(Dd); +hipFree(Ed); + if(passed == 1){ return true; } @@ -522,7 +608,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } @@ -549,7 +640,12 @@ for(int i=0;i<512;i++){ passed = 1; } } -free(A); + +delete [] A; +delete [] B; +hipFree(Ad); +hipFree(Bd); + if(passed == 1){ return true; } diff --git a/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp b/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp index 79a272aaf2..d1bbed63cd 100644 --- a/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp +++ b/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp @@ -36,17 +36,23 @@ __global__ void Kern(hipLaunchParm lp, float *A) int main() { - float *A, *Ad; + float A[len]; + float *Ad; + for(int i=0;i Date: Thu, 2 Nov 2017 12:29:09 +0530 Subject: [PATCH 24/52] Remove libsupc++ dependency from HIP Change-Id: Ib843f99dfb3057b7c28029250c2449732ea7f17f --- hipamd/bin/hipcc | 1 - hipamd/packaging/hip_hcc.txt | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index bd01177a77..bbe079db41 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -138,7 +138,6 @@ if ($HIP_PLATFORM eq "hcc") { $HIPCXXFLAGS .= " -I$HSA_PATH/include"; $HIPCXXFLAGS .= " -Wno-deprecated-register"; - $HIPLDFLAGS .= " -lsupc++"; $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am -lhsakmt "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; diff --git a/hipamd/packaging/hip_hcc.txt b/hipamd/packaging/hip_hcc.txt index 47ad89450c..7c1736eee5 100644 --- a/hipamd/packaging/hip_hcc.txt +++ b/hipamd/packaging/hip_hcc.txt @@ -42,9 +42,9 @@ set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") if(@COMPILE_HIP_ATP_MARKER@) - set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler, libstdc++-static") + set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler") else() - set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, libstdc++-static") + set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@") endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") set(CPACK_SOURCE_GENERATOR "TGZ") From d12cf0da7da537bb772448027cc48e51dea19338 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 3 Nov 2017 01:44:48 +0000 Subject: [PATCH 25/52] This introduces correct support for agent global variables, and implements hipModuleGetGlobal as an actual equivalent for cuModuleGetGlobal. --- hipamd/src/hip_module.cpp | 156 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 5 deletions(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 7799ad86c8..38411f2347 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -613,6 +614,125 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); } +namespace +{ + struct Agent_global { + std::string name; + hipDeviceptr_t address; + std::uint32_t byte_cnt; + }; + + inline + void* address(hsa_executable_symbol_t x) + { + void* r = nullptr; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r); + + return r; + } + + inline + std::string name(hsa_executable_symbol_t x) + { + uint32_t sz = 0u; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); + + std::string r(sz, '\0'); + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); + + return r; + } + + inline + std::uint32_t size(hsa_executable_symbol_t x) + { + std::uint32_t r = 0; + hsa_executable_symbol_get_info( + x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r); + + return r; + } + + inline + void track(const Agent_global& x) + { + tprintf( + DB_MEM, + " add variable '%s' with ptr=%p size=%u to tracker\n", + x.name.c_str(), + x.address, + x.byte_cnt); + + auto device = ihipGetTlsDefaultCtx()->getWriteableDevice(); + + hc::AmPointerInfo ptr_info( + nullptr, + x.address, + x.address, + x.byte_cnt, + device->_acc, + true, + false); + hc::am_memtracker_add(x.address, ptr_info); + hc::am_memtracker_update(x.address, device->_deviceId, 0u); + } + + template> + inline + hsa_status_t copy_agent_global_variables( + hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out) + { + assert(out); + + hsa_symbol_kind_t t = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t); + + if (t == HSA_SYMBOL_KIND_VARIABLE) { + static_cast(out)->push_back( + Agent_global{name(x), address(x), size(x)}); + + track(static_cast(out)->back()); + } + + return HSA_STATUS_SUCCESS; + } + + inline + hsa_agent_t this_agent() + { + auto ctx = ihipGetTlsDefaultCtx(); + + if (!ctx) throw std::runtime_error{"No active HIP context."}; + + auto device = ctx->getDevice(); + + if (!device) throw std::runtime_error{"No device available for HIP."}; + + ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId); + + if (!currentDevice) { + throw std::runtime_error{"No active device for HIP"}; + } + + return currentDevice->_hsaAgent; + } + + inline + std::vector read_agent_globals(hipModule_t hmodule) + { + std::vector r; + + + hsa_executable_iterate_agent_symbols( + hmodule->executable, this_agent(), copy_agent_global_variables, &r); + + return r; + } +} + hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char* name) { @@ -625,11 +745,37 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, return ihipLogStatus(hipErrorNotInitialized); } else{ - hipFunction_t func; - ret = ihipModuleGetSymbol(&func, hmod, name); - *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func->_object); - return ihipLogStatus(ret); + static std::unordered_map< + hipModule_t, std::vector> agent_globals; + + // TODO: this is not particularly robust. + if (agent_globals.count(hmod) == 0) { + static std::mutex mtx; + std::lock_guard lck{mtx}; + + if (agent_globals.count(hmod) == 0) { + agent_globals.emplace(hmod, read_agent_globals(hmod)); + } + } + + // TODO: This is unsafe iff some other emplacement triggers rehashing. + // It will have to be properly fleshed out in the future. + const auto it0 = agent_globals.find(hmod); + if (it0 == agent_globals.cend()) { + throw std::runtime_error{"agent_globals data structure corrupted."}; + } + + const auto it1 = std::find_if( + it0->second.cbegin(), + it0->second.cend(), + [=](const Agent_global& x) { return x.name == name; }); + + if (it1 == it0->second.cend()) return ihipLogStatus(hipErrorNotFound); + + *dptr = it1->address; + *bytes = it1->byte_cnt; + + return ihipLogStatus(hipSuccess); } } From 19946ca43d06d869f05333b122b40e64a43d8e5c Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Fri, 3 Nov 2017 10:39:47 +0530 Subject: [PATCH 26/52] [newTests]Modified the test as per review comments --- hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp index 94de10ff4c..0a3dce284e 100644 --- a/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp +++ b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp @@ -30,15 +30,17 @@ THE SOFTWARE. #include "test_common.h" +const int len 256; + int main() { int numDevices = 0; - char name[256]; + char name[len]; hipDevice_t device; HIPCHECK(hipGetDeviceCount(&numDevices)); for(int i=0;i Date: Fri, 3 Nov 2017 14:52:19 +0530 Subject: [PATCH 27/52] [newTests]modified the test --- hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp index 0a3dce284e..ba260ac2e5 100644 --- a/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp +++ b/hipamd/tests/src/runtimeApi/device/hipDeviceGetName.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. #include "test_common.h" -const int len 256; +#define len 256 int main() { From 1c65bb1684d79df52deea0111c24f5c2c39bc246 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 5 Nov 2017 10:32:24 -0600 Subject: [PATCH 28/52] Set event state AFTER it is recorded. --- hipamd/src/hip_event.cpp | 5 ++--- hipamd/src/hip_stream.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index d1ee37a45e..91c3f9cb55 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -45,10 +45,10 @@ ihipEvent_t::ihipEvent_t(unsigned flags) void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType) { - _state = hipEventStatusRecording; _marker = *cf; _type = eventType; _stream = stream; + _state = hipEventStatusRecording; } @@ -157,13 +157,12 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) event->_state = hipEventStatusComplete; return ihipLogStatus(hipSuccess); } else { - event->_state = hipEventStatusRecording; // Clear timestamps event->_timestamp = 0; // Record the event in the stream: stream->locked_recordEvent(event); - + event->_state = hipEventStatusRecording; return ihipLogStatus(hipSuccess); } } else { diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index f62be31b9e..0cb5e732ab 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -103,7 +103,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { stream = ihipSyncAndResolveStream(stream); - // This will user create_blocking_marker to wait on the specified queue. + // This will use create_blocking_marker to wait on the specified queue. stream->locked_streamWaitEvent(event); } From 7427abba04f5e5158071db0ca9a57c575999df4e Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Mon, 6 Nov 2017 11:31:52 +0530 Subject: [PATCH 29/52] More tests for memory,stream & error APIs --- .../runtimeApi/error/hipPeekAtLastError.cpp | 38 +++++++++ .../runtimeApi/memory/hipMemcpyDtoDtoH.cpp | 77 ++++++++++++++++++ .../memory/hipMemcpyDtoDtoHAsync.cpp | 81 +++++++++++++++++++ .../src/runtimeApi/memory/hipMemcpyPeer.cpp | 77 ++++++++++++++++++ .../runtimeApi/memory/hipMemcpyPeerAsync.cpp | 79 ++++++++++++++++++ .../runtimeApi/stream/hipStreamGetFlags.cpp | 44 ++++++++++ 6 files changed, 396 insertions(+) create mode 100644 hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp create mode 100644 hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp create mode 100644 hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp create mode 100644 hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp create mode 100644 hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp create mode 100644 hipamd/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp diff --git a/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp b/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp new file mode 100644 index 0000000000..f06ee3b271 --- /dev/null +++ b/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp @@ -0,0 +1,38 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipGetDevice(int *device); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + hipSetDevice(-1); + if( hipPeekAtLastError() != hipSuccess) + passed(); +} diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp new file mode 100644 index 0000000000..568ecb9c35 --- /dev/null +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + hipDevice_t device; + size_t Nbytes = N*sizeof(int); + int numDevices = 0; + int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; + int *A_h, *B_h, *C_h ; + + HIPCHECK(hipGetDeviceCount(&numDevices)); + if(numDevices > 1) + { + HIPCHECK(hipSetDevice(0)); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMalloc(&X_d,Nbytes)); + HIPCHECK(hipMalloc(&Y_d,Nbytes)); + HIPCHECK(hipMalloc(&Z_d,Nbytes)); + + + HIPCHECK(hipSetDevice(0)); + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMemcpyDtoD(X_d, A_d, Nbytes)); + HIPCHECK(hipMemcpyDtoD(Y_d, B_d, Nbytes)); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N); + HIPCHECK(hipMemcpyDtoH(C_h, Z_d, Nbytes)); + HIPCHECK(hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + } + + passed(); + +} + diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp new file mode 100644 index 0000000000..036ec16529 --- /dev/null +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp @@ -0,0 +1,81 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + hipDevice_t device; + size_t Nbytes = N*sizeof(int); + int numDevices = 0; + int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; + int *A_h, *B_h, *C_h ; + hipStream_t s; + + HIPCHECK(hipGetDeviceCount(&numDevices)); + if(numDevices > 1) + { + HIPCHECK(hipSetDevice(0)); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMalloc(&X_d,Nbytes)); + HIPCHECK(hipMalloc(&Y_d,Nbytes)); + HIPCHECK(hipMalloc(&Z_d,Nbytes)); + + + HIPCHECK(hipSetDevice(0)); + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HIPCHECK(hipStreamCreate(&s)); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMemcpyDtoDAsync(X_d, A_d, Nbytes, s)); + HIPCHECK(hipMemcpyDtoDAsync(Y_d, B_d, Nbytes, s)); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N); + HIPCHECK(hipMemcpyDtoHAsync(C_h, Z_d, Nbytes, s)); + HIPCHECK(hipStreamSynchronize(s)); + HIPCHECK(hipDeviceSynchronize()); + + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + } + + passed(); + + +} + diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp new file mode 100644 index 0000000000..dc2ba78ae0 --- /dev/null +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + hipDevice_t device; + size_t Nbytes = N*sizeof(int); + int numDevices = 0; + int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; + int *A_h, *B_h, *C_h ; + + HIPCHECK(hipGetDeviceCount(&numDevices)); + if(numDevices > 1) + { + HIPCHECK(hipSetDevice(0)); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMalloc(&X_d,Nbytes)); + HIPCHECK(hipMalloc(&Y_d,Nbytes)); + HIPCHECK(hipMalloc(&Z_d,Nbytes)); + + + HIPCHECK(hipSetDevice(0)); + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + + HIPCHECK(hipSetDevice(1)); + hipMemcpyPeer(X_d, 1, A_d, 0, Nbytes); //this call is eqv to hipMemcpy(hipMemcpyD2D) which goes via stg bufs. + hipMemcpyPeer(Y_d, 1, B_d, 0, Nbytes); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N); + HIPCHECK ( hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + } + passed(); + + + + +} + diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp new file mode 100644 index 0000000000..1f0108c7cb --- /dev/null +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp @@ -0,0 +1,79 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Conformance test for checking functionality of + * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + +int main() +{ + hipDevice_t device; + size_t Nbytes = N*sizeof(int); + int numDevices = 0; + int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; + int *A_h, *B_h, *C_h ; + hipStream_t s; + + + HIPCHECK(hipGetDeviceCount(&numDevices)); + if(numDevices > 1) + { + HIPCHECK(hipSetDevice(0)); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMalloc(&X_d,Nbytes)); + HIPCHECK(hipMalloc(&Y_d,Nbytes)); + HIPCHECK(hipMalloc(&Z_d,Nbytes)); + + + HIPCHECK(hipSetDevice(0)); + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HIPCHECK(hipStreamCreate(&s)); + HIPCHECK(hipSetDevice(1)); + HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s)); + HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s)); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N); + HIPCHECK ( hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK (hipDeviceSynchronize()); + HIPCHECK (hipStreamSynchronize(s)); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + } + + passed(); + + +} + diff --git a/hipamd/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp b/hipamd/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp new file mode 100644 index 0000000000..9212c70e7f --- /dev/null +++ b/hipamd/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp @@ -0,0 +1,44 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "test_common.h" + + +int main(int argc, char *argv[]) +{ + hipStream_t stream; + unsigned int flags; + HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault)); + HIPCHECK(hipStreamGetFlags(stream, &flags)); + HIPASSERT(flags == 0); + HIPCHECK(hipStreamDestroy(stream)); + + HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + HIPCHECK(hipStreamGetFlags(stream, &flags)); + HIPASSERT(flags == 1); + HIPCHECK(hipStreamDestroy(stream)); + + passed(); +} From e18f4239b69bf1930f8d0ae55f0644846152b403 Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Mon, 6 Nov 2017 15:13:36 +0530 Subject: [PATCH 30/52] Modified tests as per review --- ...hipMemcpyDtoDtoH.cpp => hipMemcpyDtoD.cpp} | 11 ++++++---- ...toDtoHAsync.cpp => hipMemcpyDtoDAsync.cpp} | 13 +++++++----- .../src/runtimeApi/memory/hipMemcpyPeer.cpp | 21 +++++++++++-------- .../runtimeApi/memory/hipMemcpyPeerAsync.cpp | 6 ++++++ 4 files changed, 33 insertions(+), 18 deletions(-) rename hipamd/tests/src/runtimeApi/memory/{hipMemcpyDtoDtoH.cpp => hipMemcpyDtoD.cpp} (88%) rename hipamd/tests/src/runtimeApi/memory/{hipMemcpyDtoDtoHAsync.cpp => hipMemcpyDtoDAsync.cpp} (88%) diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp similarity index 88% rename from hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp rename to hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp index 568ecb9c35..ccb02b74ce 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoH.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp @@ -51,11 +51,11 @@ int main() HIPCHECK(hipSetDevice(0)); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); - HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK (hipDeviceSynchronize()); + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); @@ -69,6 +69,9 @@ int main() HipTest::checkVectorADD(A_h, B_h, C_h, N); HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK(hipFree(X_d)); + HIPCHECK(hipFree(Y_d)); + HIPCHECK(hipFree(Z_d)); } passed(); diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp similarity index 88% rename from hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp rename to hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp index 036ec16529..5c99b43564 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDtoHAsync.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp @@ -52,11 +52,11 @@ int main() HIPCHECK(hipSetDevice(0)); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); - HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK (hipDeviceSynchronize()); + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipStreamCreate(&s)); @@ -70,8 +70,11 @@ int main() HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); - + HIPCHECK(hipStreamDestroy(s)); HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK(hipFree(X_d)); + HIPCHECK(hipFree(Y_d)); + HIPCHECK(hipFree(Z_d)); } passed(); diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp index dc2ba78ae0..7e2fc2d3d0 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp @@ -48,25 +48,28 @@ int main() HIPCHECK(hipMalloc(&X_d,Nbytes)); HIPCHECK(hipMalloc(&Y_d,Nbytes)); HIPCHECK(hipMalloc(&Z_d,Nbytes)); - HIPCHECK(hipSetDevice(0)); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N); - HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK (hipDeviceSynchronize()); + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); - - + HIPCHECK(hipSetDevice(1)); hipMemcpyPeer(X_d, 1, A_d, 0, Nbytes); //this call is eqv to hipMemcpy(hipMemcpyD2D) which goes via stg bufs. hipMemcpyPeer(Y_d, 1, B_d, 0, Nbytes); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N); - HIPCHECK ( hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK (hipDeviceSynchronize()); + HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); + HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK(hipFree(X_d)); + HIPCHECK(hipFree(Y_d)); + HIPCHECK(hipFree(Z_d)); } passed(); diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp index 1f0108c7cb..9d46ccb0d8 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp @@ -70,6 +70,12 @@ int main() HIPCHECK (hipDeviceSynchronize()); HIPCHECK (hipStreamSynchronize(s)); HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HIPCHECK(hipStreamDestroy(s)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK(hipFree(X_d)); + HIPCHECK(hipFree(Y_d)); + HIPCHECK(hipFree(Z_d)); } passed(); From f92648e4473e5fe92d5a46760ae532cb238cfa7e Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 6 Nov 2017 16:53:57 +0530 Subject: [PATCH 31/52] [hit] Add support for cmake commands as part of test infra Cmake supports several builtin command-line tools. These tools can now be specified as part of the HIT block. These commands will be run during cmake configure step. Change-Id: I32466c94b2fe1ecdc30249755fc027102295617d --- hipamd/tests/README.md | 12 ++++++++++++ hipamd/tests/hit/HIT.cmake | 14 ++++++++++++++ hipamd/tests/hit/parser | 27 +++++++++++++++++++++------ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/hipamd/tests/README.md b/hipamd/tests/README.md index 27cde7c534..a9638ba95f 100644 --- a/hipamd/tests/README.md +++ b/hipamd/tests/README.md @@ -75,6 +75,18 @@ RUN: %t CMAKE_TEST_NAME EXCLUDE_HIP_PLATFORM +``` +cmake_command: refers to any of the commands supported by ```cmake -E``` as specified in the [cmake documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html#command-line-tool-mode). Note that the commands are limited by the version of cmake the user is running. +options_to_cmake_command: refers to the arguments supported by the specific cmake_command. The arguments are parsed by HIT to replace special markers. The markers supported by HIT are: +%S: Refers to the source directory containing the current source file. +%B: Refers to the build directory for the current cmake project i.e. CMAKE_CURRENT_BINARY_DIR. + + ### Running tests: ``` ctest diff --git a/hipamd/tests/hit/HIT.cmake b/hipamd/tests/hit/HIT.cmake index fd0001214e..82e8508dcd 100644 --- a/hipamd/tests/hit/HIT.cmake +++ b/hipamd/tests/hit/HIT.cmake @@ -155,6 +155,20 @@ macro(HIT_ADD_FILES _dir _label _parent) endif() endforeach() + # Run cmake commands + execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --cmakeCMDs ${file} + OUTPUT_VARIABLE _contents + ERROR_QUIET + WORKING_DIRECTORY ${_dir} + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE "\n" ";" _contents "${_contents}") + string(REGEX REPLACE "%S" ${_dir} _contents "${_contents}") + string(REGEX REPLACE "%B" ${CMAKE_CURRENT_BINARY_DIR} _contents "${_contents}") + foreach(_cmd ${_contents}) + string(REGEX REPLACE " " ";" _cmd "${_cmd}") + execute_process(COMMAND ${CMAKE_COMMAND} -E ${_cmd}) + endforeach() + # Add tests execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --runCMDs ${file} OUTPUT_VARIABLE _contents diff --git a/hipamd/tests/hit/parser b/hipamd/tests/hit/parser index 3d851752e4..f77bd524f3 100755 --- a/hipamd/tests/hit/parser +++ b/hipamd/tests/hit/parser @@ -8,7 +8,7 @@ use File::Spec; sub parse_file { my $file = shift; (my $exe = $file) =~ s/\.[^.]+$//g; - my (@buildCMDs, @runCMDs, @runNamedCMDs); + my (@buildCMDs, @runCMDs, @runNamedCMDs, @cmakeCMDs); if (open (SOURCE, '<:encoding(UTF-8)', "$file")) { while () { my $line=$_; @@ -36,10 +36,17 @@ sub parse_file { $line =~ s/\R//g; # Remove line endings push @runNamedCMDs, $line; } + # Look for CMAKECMD instructions + if ($line =~ /^ \* CMAKECMD:/) { + $line =~ s/^ \* CMAKECMD: //g; # Remove " * CMAKECMD: " + # Substitute %S -> srcdir and %B -> builddir happens in cmake + $line =~ s/\R//g; # Remove line endings + push @cmakeCMDs, $line; + } } close(SOURCE); } - return (\@buildCMDs, \@runCMDs, \@runNamedCMDs); + return (\@buildCMDs, \@runCMDs, \@runNamedCMDs, \@cmakeCMDs); } # Exit if no arguments specified @@ -53,8 +60,9 @@ my @options = (); my $retBuildCMDs = 0; my $retRunCMDs = 0; my $retRunNamedCMDs = 0; +my $retCmakeCMDs = 0; foreach $arg (@ARGV) { - if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs) { + if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs or $retCmakeCMDs) { push (@options, $arg); } if ($arg eq '--buildCMDs') { @@ -66,18 +74,21 @@ foreach $arg (@ARGV) { if ($arg eq '--runNamedCMDs') { $retRunNamedCMDs = 1; } + if ($arg eq '--cmakeCMDs') { + $retCmakeCMDs = 1; + } } # Atleast one command needs to be specified -if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0)) { - die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs> FILENAMEs\n"; +if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0) and ($retCmakeCMDs eq 0)) { + die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs|--cmakeCMDs> FILENAMEs\n"; } # Iterate over input files foreach $file (@options) { # Convert absolute path to path relative to working directory my $relfile = File::Spec->abs2rel($file); - my ($buildCMDs, $runCMDs, $runNamedCMDs) = parse_file("$relfile"); + my ($buildCMDs, $runCMDs, $runNamedCMDs, $cmakeCMDs) = parse_file("$relfile"); if ($retBuildCMDs) { # print "BuildCMDs:\n"; print "$_\n" for @$buildCMDs; @@ -90,6 +101,10 @@ foreach $file (@options) { # print "RunNamedCMDs:\n"; print "$_\n" for @$runNamedCMDs; } + if ($retCmakeCMDs) { + # print "CmakeCMDs:\n"; + print "$_\n" for @$cmakeCMDs; + } } # vim: ts=4:sw=4:expandtab:smartindent From 955cfbfdc76fbbff4c1be0525e915bcdb7e5410a Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 6 Nov 2017 23:49:25 +0000 Subject: [PATCH 32/52] Make hipEvent_t thread safe. Support re-recording of same event by different threads. - Add criticalData structure to hipEvent_t, similar to mechanism used for streams, contexts, device. Events are always locked after streams to avoid deadlock. - ihipEvent_t::locked_copyCrit can be used to copy critical state including marker. The critical state in the event can then be re-recorded. - refactor hipEventElapsedTime. Remmove stale debug code, native signal refs. --- hipamd/src/hip_event.cpp | 166 +++++++++++++++++----------------- hipamd/src/hip_hcc.cpp | 21 +++-- hipamd/src/hip_hcc_internal.h | 94 ++++++++++++++----- hipamd/src/hip_stream.cpp | 10 +- 4 files changed, 177 insertions(+), 114 deletions(-) diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index 58d764bc44..494fd3854d 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -31,12 +31,9 @@ THE SOFTWARE. ihipEvent_t::ihipEvent_t(unsigned flags) + : _criticalData(this) { - _state = hipEventStatusCreated; - _stream = NULL; _flags = flags; - _timestamp = 0; - _type = hipEventTypeIndependent; }; @@ -45,56 +42,45 @@ ihipEvent_t::ihipEvent_t(unsigned flags) void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType) { - _marker = *cf; - _type = eventType; - _stream = stream; - _state = hipEventStatusRecording; + LockedAccessor_EventCrit_t crit(_criticalData); + crit->_eventData.marker(*cf); + crit->_eventData._type = eventType; + crit->_eventData._stream = stream; + crit->_eventData._state = hipEventStatusRecording; } -void ihipEvent_t::refereshEventStatus() +std::pair +ihipEvent_t::refreshEventStatus() { - bool isReady0 = locked_isReady(); - bool isReady1; - int val = 0; - if (_state == hipEventStatusRecording) { - // TODO - use completion-future functions to obtain ticks and timestamps: - hsa_signal_t *sig = static_cast (_marker.get_native_handle()); - isReady1 = locked_isReady(); - if (sig) { - val = hsa_signal_load_acquire(*sig); - if (val == 0) { + auto ecd = locked_copyCrit(); + if (ecd._state == hipEventStatusRecording) { + bool isReady1 = ecd._stream->locked_eventIsReady(this); + if (isReady1) { + LockedAccessor_EventCrit_t eCrit(_criticalData); - if ((_type == hipEventTypeIndependent) || (_type == hipEventTypeStopCommand)) { - _timestamp = _marker.get_end_tick(); - } else if (_type == hipEventTypeStartCommand) { - _timestamp = _marker.get_begin_tick(); - } else { - assert(0); // TODO - move to debug assert - _timestamp = 0; - } - - _state = hipEventStatusComplete; + if ((eCrit->_eventData._type == hipEventTypeIndependent) || + (eCrit->_eventData._type == hipEventTypeStopCommand)) { + eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_end_tick(); + } else if (eCrit->_eventData._type == hipEventTypeStartCommand) { + eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_begin_tick(); + } else { + eCrit->_eventData._timestamp = 0; + assert(0); // TODO - move to debug assert } + + eCrit->_eventData._state = hipEventStatusComplete; + + return std::pair (eCrit->_eventData._state, eCrit->_eventData._timestamp); } - } + } - if (_state != hipEventStatusComplete) { - //printf (" not ready isReady0=%d val=%d isReady1=%d\n", isReady0, val, isReady1); - } + // Not complete path here: + return std::pair (ecd._state, ecd._timestamp); } -bool ihipEvent_t::locked_isReady() -{ - return _stream->locked_eventIsReady(this); -} - -void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode) -{ - return _stream->locked_eventWaitComplete(this, waitMode); -} hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) @@ -136,33 +122,44 @@ hipError_t hipEventCreate(hipEvent_t* event) return ihipLogStatus(ihipEventCreate(event, 0)); } + hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream); - if (event && event->_state != hipEventStatusUnitialized) { + auto ecd = event->locked_copyCrit(); + + if (event && ecd._state != hipEventStatusUnitialized) { stream = ihipSyncAndResolveStream(stream); - event->_stream = stream; - if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) { - // TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0 - + // // If default stream , then wait on all queues. ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); ctx->locked_syncDefaultStream(true, true); - event->_timestamp = hc::get_system_ticks(); - event->_state = hipEventStatusComplete; + { + LockedAccessor_EventCrit_t eCrit(event->criticalData()); + eCrit->_eventData.marker(hc::completion_future()); // reset event + eCrit->_eventData._stream = stream; + eCrit->_eventData._timestamp = hc::get_system_ticks(); + eCrit->_eventData._state = hipEventStatusComplete; + } return ihipLogStatus(hipSuccess); } else { - // Clear timestamps - event->_timestamp = 0; - // Record the event in the stream: - stream->locked_recordEvent(event); - event->_state = hipEventStatusRecording; + // Keep a copy outside the critical section so we lock stream first, then event - to avoid deadlock + hc::completion_future cf = stream->locked_recordEvent(event); + + { + LockedAccessor_EventCrit_t eCrit(event->criticalData()); + eCrit->_eventData.marker(cf); + eCrit->_eventData._stream = stream; + eCrit->_eventData._timestamp = 0; + eCrit->_eventData._state = hipEventStatusRecording; + } + return ihipLogStatus(hipSuccess); } } else { @@ -170,15 +167,13 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) } } + hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); if (event) { - event->_state = hipEventStatusUnitialized; - delete event; - event = NULL; return ihipLogStatus(hipSuccess); } else { @@ -193,20 +188,24 @@ hipError_t hipEventSynchronize(hipEvent_t event) if (!(event->_flags & hipEventReleaseToSystem)) { tprintf(DB_WARN, "hipEventSynchronize on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); } + auto ecd = event->locked_copyCrit(); if (event) { - if (event->_state == hipEventStatusUnitialized) { + if (ecd._state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidResourceHandle); - } else if (event->_state == hipEventStatusCreated ) { + } else if (ecd._state == hipEventStatusCreated ) { // Created but not actually recorded on any device: return ihipLogStatus(hipSuccess); - } else if (HIP_SYNC_NULL_STREAM && (event->_stream->isDefaultStream() )) { + } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream() )) { auto *ctx = ihipGetTlsDefaultCtx(); // TODO-HIP_SYNC_NULL_STREAM - can remove this code ctx->locked_syncDefaultStream(true, true); return ihipLogStatus(hipSuccess); } else { - event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); + ecd._stream->locked_eventWaitComplete( + ecd.marker(), + (event->_flags & hipEventBlockingSync) ? + hc::hcWaitModeBlocked : hc::hcWaitModeActive); return ihipLogStatus(hipSuccess); } @@ -222,13 +221,16 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) hipError_t status = hipSuccess; *ms = 0.0f; + + auto startEcd = start->locked_copyCrit(); + auto stopEcd = stop->locked_copyCrit(); if ((start == nullptr) || (start->_flags & hipEventDisableTiming) || - (start->_state == hipEventStatusUnitialized) || (start->_state == hipEventStatusCreated) || + (startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) || (stop == nullptr) || (stop->_flags & hipEventDisableTiming) || - ( stop->_state == hipEventStatusUnitialized) || ( stop->_state == hipEventStatusCreated)) { + (stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) { // Both events must be at least recorded else return hipErrorInvalidResourceHandle @@ -236,31 +238,31 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) } else { // Refresh status, if still recording... - start->refereshEventStatus(); - stop->refereshEventStatus(); - if ((start->_state == hipEventStatusComplete) && (stop->_state == hipEventStatusComplete)) { - // Common case, we have good information for both events. + auto startStatus = start->refreshEventStatus(); // pair < state, timestamp > + auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp > - int64_t tickDiff = (stop->timestamp() - start->timestamp()); + if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) { + // Common case, we have good information for both events. 'second" is the timestamp: + int64_t tickDiff = (stopStatus.second - startStatus.second); - uint64_t freqHz; - hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); - if (freqHz) { - *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; - status = hipSuccess; - } else { - * ms = 0.0f; - status = hipErrorInvalidValue; - } + uint64_t freqHz; + hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); + if (freqHz) { + *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; + status = hipSuccess; + } else { + * ms = 0.0f; + status = hipErrorInvalidValue; + } - } else if ((start->_state == hipEventStatusRecording) || - (stop->_state == hipEventStatusRecording)) { + } else if ((startStatus.first == hipEventStatusRecording) || + (stopStatus.first == hipEventStatusRecording)) { status = hipErrorNotReady; } else { - assert(0); + assert(0); } } @@ -275,7 +277,9 @@ hipError_t hipEventQuery(hipEvent_t event) tprintf(DB_WARN, "hipEventQuery on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n"); } - if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) { + auto ecd = event->locked_copyCrit(); + + if ((ecd._state == hipEventStatusRecording) && !ecd._stream->locked_eventIsReady(event)) { return ihipLogStatus(hipErrorNotReady); } else { return ihipLogStatus(hipSuccess); diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 7b59e96975..2d2d2745fe 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -339,12 +339,11 @@ void ihipStream_t::locked_wait() // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. -void ihipStream_t::locked_streamWaitEvent(hipEvent_t event) +void ihipStream_t::locked_streamWaitEvent(ihipEventData_t &ecd) { LockedAccessor_StreamCrit_t crit(_criticalData); - - crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope); + crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); } @@ -352,24 +351,28 @@ void ihipStream_t::locked_streamWaitEvent(hipEvent_t event) // Note this does not provide any kind of host serialization. bool ihipStream_t::locked_eventIsReady(hipEvent_t event) { + // Event query that returns "Complete" may cause HCC to manipulate // internal queue state so lock the stream's queue here. - LockedAccessor_StreamCrit_t crit(_criticalData); + LockedAccessor_StreamCrit_t scrit(_criticalData); - return (event->marker().is_ready()); + LockedAccessor_EventCrit_t ecrit(event->criticalData()); + + return (ecrit->_eventData.marker().is_ready()); } -void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode) +// Waiting on event can cause HCC to reclaim stream resources - so need to lock the stream. +void ihipStream_t::locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode) { LockedAccessor_StreamCrit_t crit(_criticalData); - event->marker().wait(waitMode); + marker.wait(waitMode); } // Create a marker in this stream. // Save state in the event so it can track the status of the event. -void ihipStream_t::locked_recordEvent(hipEvent_t event) +hc::completion_future ihipStream_t::locked_recordEvent(hipEvent_t event) { // Lock the stream to prevent simultaneous access LockedAccessor_StreamCrit_t crit(_criticalData); @@ -385,7 +388,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event) scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope; } - event->marker(crit->_av.create_marker(scopeFlag)); + return crit->_av.create_marker(scopeFlag); }; //============================================================================= diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 32fc4686fa..6b51f5c202 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -137,6 +137,7 @@ extern std::vector g_dbStopTriggers; class ihipStream_t; class ihipDevice_t; class ihipCtx_t; +struct ihipEventData_t; // Color defs for debug messages: #define KNRM "\x1B[0m" @@ -152,10 +153,12 @@ extern const char *API_COLOR; extern const char *API_COLOR_END; -// If set, thread-safety is enforced on all stream functions. -// Stream functions will acquire a mutex before entering critical sections. -#define STREAM_THREAD_SAFE 1 +// If set, thread-safety is enforced on all event/stream/ctx/device functions. +// Can disable for performance or functional experiments - in this case +// the code uses a dummy "no-op" mutex. +#define EVENT_THREAD_SAFE 1 +#define STREAM_THREAD_SAFE 1 #define CTX_THREAD_SAFE 1 @@ -390,6 +393,12 @@ class FakeMutex void unlock() { } }; +#if EVENT_THREAD_SAFE +typedef std::mutex EventMutex; +#else +#warning "Stream thread-safe disabled" +typedef FakeMutex EventMutex; +#endif #if STREAM_THREAD_SAFE typedef std::mutex StreamMutex; @@ -540,11 +549,11 @@ public: hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); }; - void locked_streamWaitEvent(hipEvent_t event); - void locked_recordEvent(hipEvent_t event); + void locked_streamWaitEvent(ihipEventData_t & event); + hc::completion_future locked_recordEvent(hipEvent_t event); bool locked_eventIsReady(hipEvent_t event); - void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode); + void locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode); ihipStreamCritical_t &criticalData() { return _criticalData; }; @@ -628,32 +637,76 @@ enum ihipEventType_t { hipEventTypeStopCommand, }; + +struct ihipEventData_t +{ + ihipEventData_t() { + _state = hipEventStatusCreated; + _stream = NULL; + _timestamp = 0; + _type = hipEventTypeIndependent; + }; + + void marker(const hc::completion_future & marker) { _marker = marker; }; + hc::completion_future & marker() { return _marker; } + uint64_t timestamp() const { return _timestamp; } ; + ihipEventType_t type() const { return _type; }; + + ihipEventType_t _type; + hipEventStatus_t _state; + hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded + uint64_t _timestamp; // store timestamp, may be set on host or by marker. +private: + hc::completion_future _marker; +}; + + +//============================================================================= +//class ihipEventCriticalBase_t +template +class ihipEventCriticalBase_t : LockedBase +{ +public: + ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) : + _parent(parentEvent) + {} + ~ihipEventCriticalBase_t() {}; + + // Keep data in structure so it can be easily copied into snapshots + // (used to reduce lock contention and preserve correct lock order) + ihipEventData_t _eventData; + +private: + const ihipEvent_t *_parent; + friend class LockedAccessor; +}; + +typedef ihipEventCriticalBase_t ihipEventCritical_t; + +typedef LockedAccessor LockedAccessor_EventCrit_t; + // internal hip event structure. class ihipEvent_t { public: ihipEvent_t(unsigned flags); void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType); - void refereshEventStatus(); - hc::completion_future & marker() { return _marker; } - void marker(hc::completion_future cf) { _marker = cf; }; + std::pair refreshEventStatus(); // returns pair - bool locked_isReady(); - void locked_waitComplete(hc::hcWaitMode waitMode); - uint64_t timestamp() const { return _timestamp; } ; - ihipEventType_t type() const { return _type; }; + // Return a copy of the critical state. The critical data is locked during the copy. + ihipEventData_t locked_copyCrit() { + LockedAccessor_EventCrit_t crit(_criticalData); + return _criticalData._eventData; + }; + + ihipEventCritical_t &criticalData() { return _criticalData; }; public: - hipEventStatus_t _state; - - hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded unsigned _flags; - private: - hc::completion_future _marker; - ihipEventType_t _type; - uint64_t _timestamp; // store timestamp, may be set on host or by marker. + ihipEventCritical_t _criticalData; + friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); } ; @@ -671,7 +724,6 @@ public: }; ~ihipDeviceCriticalBase_t() { - } // Contexts: diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 0cb5e732ab..23b73e6ba6 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -93,18 +93,22 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int hipError_t e = hipSuccess; + auto ecd = event->locked_copyCrit(); + if (event == nullptr) { e = hipErrorInvalidResourceHandle; - } else if (event->_state != hipEventStatusUnitialized) { + } else if (ecd._state != hipEventStatusUnitialized) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { // conservative wait on host for the specified event to complete: - event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); + // return _stream->locked_eventWaitComplete(this, waitMode); + // + ecd._stream->locked_eventWaitComplete(ecd.marker(), (event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { stream = ihipSyncAndResolveStream(stream); // This will use create_blocking_marker to wait on the specified queue. - stream->locked_streamWaitEvent(event); + stream->locked_streamWaitEvent(ecd); } } // else event not recorded, return immediately and don't create marker. From b5911acce2d5794ec09e13b3f409d868538bddab Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 6 Nov 2017 17:10:07 -0600 Subject: [PATCH 33/52] hipStreamWaitEvent returns success if event created but not recorded --- hipamd/src/hip_stream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 23b73e6ba6..dab31dad62 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -98,7 +98,8 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int if (event == nullptr) { e = hipErrorInvalidResourceHandle; - } else if (ecd._state != hipEventStatusUnitialized) { + } else if ((ecd._state != hipEventStatusUnitialized) && + (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { // conservative wait on host for the specified event to complete: From e71456cc6c37c3bdaa222fd4c1dd8c8000399501 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 6 Nov 2017 23:42:35 +0000 Subject: [PATCH 34/52] Check for null event in hipEventElapsedTime --- hipamd/src/hip_event.cpp | 63 +++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index 494fd3854d..3664e88d2b 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -221,48 +221,51 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) hipError_t status = hipSuccess; *ms = 0.0f; - - auto startEcd = start->locked_copyCrit(); - auto stopEcd = stop->locked_copyCrit(); - - if ((start == nullptr) || - (start->_flags & hipEventDisableTiming) || - (startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) || - (stop == nullptr) || - (stop->_flags & hipEventDisableTiming) || - (stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) { - - // Both events must be at least recorded else return hipErrorInvalidResourceHandle + if ((start == nullptr) || (stop == nullptr)) { status = hipErrorInvalidResourceHandle; - } else { - // Refresh status, if still recording... + + auto startEcd = start->locked_copyCrit(); + auto stopEcd = stop->locked_copyCrit(); - auto startStatus = start->refreshEventStatus(); // pair < state, timestamp > - auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp > + if ((start->_flags & hipEventDisableTiming) || + (startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) || + (stop->_flags & hipEventDisableTiming) || + (stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) { - if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) { - // Common case, we have good information for both events. 'second" is the timestamp: - int64_t tickDiff = (stopStatus.second - startStatus.second); + // Both events must be at least recorded else return hipErrorInvalidResourceHandle - uint64_t freqHz; - hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); - if (freqHz) { - *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; - status = hipSuccess; - } else { + status = hipErrorInvalidResourceHandle; + + } else { + // Refresh status, if still recording... + + auto startStatus = start->refreshEventStatus(); // pair < state, timestamp > + auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp > + + if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) { + // Common case, we have good information for both events. 'second" is the timestamp: + int64_t tickDiff = (stopStatus.second - startStatus.second); + + uint64_t freqHz; + hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz); + if (freqHz) { + *ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f; + status = hipSuccess; + } else { * ms = 0.0f; status = hipErrorInvalidValue; } - } else if ((startStatus.first == hipEventStatusRecording) || - (stopStatus.first == hipEventStatusRecording)) { + } else if ((startStatus.first == hipEventStatusRecording) || + (stopStatus.first == hipEventStatusRecording)) { - status = hipErrorNotReady; - } else { - assert(0); + status = hipErrorNotReady; + } else { + assert(0); + } } } From 2a1d7db3713eeb2b51392c641644c1df0c238a52 Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Tue, 7 Nov 2017 09:51:32 +0530 Subject: [PATCH 35/52] identation change in hipPeekAtLastError --- hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp b/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp index f06ee3b271..5ebe181e22 100644 --- a/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp +++ b/hipamd/tests/src/runtimeApi/error/hipPeekAtLastError.cpp @@ -33,6 +33,6 @@ THE SOFTWARE. int main() { hipSetDevice(-1); - if( hipPeekAtLastError() != hipSuccess) - passed(); + if(hipPeekAtLastError() != hipSuccess) + passed(); } From 0ad1308efd4a9812f811ff8132c16733fd710607 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 8 Nov 2017 00:08:55 +0000 Subject: [PATCH 36/52] Clean up trailing whitespace so as to reduce noise in #246. --- hipamd/include/hip/hcc_detail/hip_runtime.h | 4 +- hipamd/src/hip_memory.cpp | 18 ++--- .../src/runtimeApi/event/record_event.cpp | 20 +++--- .../tests/src/runtimeApi/memory/hipMemcpy.cpp | 66 +++++++++---------- .../multiThread/hipMultiThreadStreams1.cpp | 4 +- .../src/runtimeApi/stream/hipNullStream.cpp | 10 +-- .../src/runtimeApi/stream/hipStreamSync2.cpp | 28 ++++---- .../runtimeApi/stream/hipStreamWaitEvent.cpp | 24 +++---- 8 files changed, 87 insertions(+), 87 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index 370ac2abbb..d3211ed3f5 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -53,7 +53,7 @@ THE SOFTWARE. // define HIP_ENABLE_PRINTF to enable printf #ifdef HIP_ENABLE_PRINTF #define HCC_ENABLE_ACCELERATOR_PRINTF 1 -#endif +#endif //--- // Remainder of this file only compiles with HCC @@ -481,7 +481,7 @@ do {\ type* var = \ (type*)__get_dynamicgroupbaseptr(); \ -#define HIP_DYNAMIC_SHARED_ATTRIBUTE +#define HIP_DYNAMIC_SHARED_ATTRIBUTE diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 5a4b5f4b4e..8a5225d499 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -69,7 +69,7 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags) if (shareWithAll) { hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr); - tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt); + tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt); if (s != HSA_STATUS_SUCCESS) { ret = -1; } @@ -126,7 +126,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool if (HIP_INIT_ALLOC != -1) { // TODO , dont' call HIP API directly here: hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes); - } + } if (ptr != nullptr) { int r = sharePtr(ptr, ctx, shareWithAll, hipFlags); @@ -255,7 +255,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hip_status = hipErrorMemoryAllocation; } - } + } return ihipLogStatus(hip_status); @@ -288,10 +288,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } - const unsigned supportedFlags = hipHostMallocPortable - | hipHostMallocMapped - | hipHostMallocWriteCombined - | hipHostMallocCoherent + const unsigned supportedFlags = hipHostMallocPortable + | hipHostMallocMapped + | hipHostMallocWriteCombined + | hipHostMallocCoherent | hipHostMallocNonCoherent; @@ -304,7 +304,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hip_status = hipErrorInvalidValue; } else { auto device = ctx->getWriteableDevice(); - + unsigned amFlags = 0; if (flags & hipHostMallocCoherent) { amFlags = amHostCoherent; @@ -585,7 +585,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array, hsa_ext_image_data_info_t imageInfo; hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; - + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment); if (size && (*ptr == NULL)) { diff --git a/hipamd/tests/src/runtimeApi/event/record_event.cpp b/hipamd/tests/src/runtimeApi/event/record_event.cpp index bd8a3ada8e..b9653bf522 100644 --- a/hipamd/tests/src/runtimeApi/event/record_event.cpp +++ b/hipamd/tests/src/runtimeApi/event/record_event.cpp @@ -52,7 +52,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ if (!(testMask & p_tests)) { return; } - printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n", + printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n", testMask, stream, waitStart, syncModeString(syncMode)); size_t sizeBytes = numElements * sizeof(int); @@ -85,8 +85,8 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ HIPCHECK(hipEventSynchronize(start)); } - - hipError_t expectedStopError = hipSuccess; + + hipError_t expectedStopError = hipSuccess; // How to wait for the events to finish: switch (syncMode) { @@ -97,12 +97,12 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish... break; case syncStopEvent: - HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventSynchronize(stop)); break; default: assert(0); }; - + float t; @@ -111,25 +111,25 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_ failed ("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e)); } - if (e == hipSuccess) + if (e == hipSuccess) assert (t==0.0f); - + // stop usually ready unless we skipped the synchronization (syncNone) HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError); - if (e == hipSuccess) + if (e == hipSuccess) assert (t==0.0f); e = hipEventElapsedTime(&t, start, stop); HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) + if (expectedStopError == hipSuccess) assert (t>0.0f); printf ("time=%6.2f error=%s\n", t, hipGetErrorName(e)); e = hipEventElapsedTime(&t, stop, start); HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) + if (expectedStopError == hipSuccess) assert (t<0.0f); printf ("negtime=%6.2f error=%s\n", t, hipGetErrorName(e)); diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp index e8e803e44c..d8438fa848 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp @@ -58,7 +58,7 @@ public: void offset(int offset) { _offset = offset; }; int offset() const { return _offset; }; - + private: T * _A_d; T* _B_d; @@ -72,7 +72,7 @@ private: template DeviceMemory::DeviceMemory(size_t numElements) - : _maxNumElements(numElements), + : _maxNumElements(numElements), _offset(0) { T ** np = nullptr; @@ -93,7 +93,7 @@ DeviceMemory::~DeviceMemory () HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0); HIPCHECK (hipFree(_C_dd)); - + _C_dd = NULL; }; @@ -125,7 +125,7 @@ public: T * A_hh; T* B_hh; - bool _usePinnedHost; + bool _usePinnedHost; private: size_t _maxNumElements; @@ -165,11 +165,11 @@ HostMemory::HostMemory(size_t numElements, bool usePinnedHost) template void -HostMemory::reset(size_t numElements, bool full) +HostMemory::reset(size_t numElements, bool full) { // Initialize the host data: for (size_t i=0; i void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) { size_t sizeElements = numElements * sizeof(T); - printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n", - __func__, + printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n", + __func__, TYPENAME(T), sizeElements, sizeElements/1024.0/1024.0, hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault, @@ -273,8 +273,8 @@ void memcpytest2_for_type(size_t numElements) { printSep(); - DeviceMemory memD(numElements); - HostMemory memU(numElements, 0/*usePinnedHost*/); + DeviceMemory memD(numElements); + HostMemory memU(numElements, 0/*usePinnedHost*/); HostMemory memP(numElements, 1/*usePinnedHost*/); for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) { @@ -307,11 +307,11 @@ void memcpytest2_sizes(size_t maxElem=0) maxElem = free/sizeof(T)/8; } - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); HIPCHECK ( hipDeviceReset() ); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 1/*usePinnedHost*/); for (size_t elem=1; elem<=maxElem; elem*=2) { @@ -336,11 +336,11 @@ void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) HIPCHECK(hipMemGetInfo(&free, &total)); - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); HIPCHECK ( hipDeviceReset() ); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 1/*usePinnedHost*/); size_t elem = maxElem / 2; @@ -380,16 +380,16 @@ void multiThread_1(bool serialize, bool usePinnedHost) { printSep(); printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); - DeviceMemory memD(N); - HostMemory mem1(N, usePinnedHost); - HostMemory mem2(N, usePinnedHost); + DeviceMemory memD(N); + HostMemory mem1(N, usePinnedHost); + HostMemory mem2(N, usePinnedHost); std::thread t1 (memcpytest2, &memD, &mem1, N, 0,0,0); if (serialize) { t1.join(); } - + std::thread t2 (memcpytest2,&memD, &mem2, N, 0,0,0); if (serialize) { t2.join(); @@ -427,21 +427,21 @@ int main(int argc, char *argv[]) // Some tests around the 64KB boundary which have historically shown issues: printf ("\n\n=== tests&0x2 (64KB boundary)\n"); size_t maxElem = 32*1024*1024; - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0/*usePinnedHost*/); - HostMemory memP(maxElem, 0/*usePinnedHost*/); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); + HostMemory memP(maxElem, 0/*usePinnedHost*/); // These all pass: - memcpytest2(&memD, &memP, 15*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 15*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0); // Just over 64MB: - memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 17*1024*1024+1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); + memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 17*1024*1024+1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); } @@ -464,7 +464,7 @@ int main(int argc, char *argv[]) // Simplest cases: serialize the threads, and also used pinned memory: // This verifies that the sub-calls to memcpytest2 are correct. - multiThread_1(true, true); + multiThread_1(true, true); // Serialize, but use unpinned memory to stress the unpinned memory xfer path. multiThread_1(true, false); diff --git a/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp b/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp index 4f73b67ad7..9b2e749cf9 100644 --- a/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp +++ b/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp @@ -41,8 +41,8 @@ void printSep() // Designed to stress a small number of simple smoke tests template< - typename T=float, - class P=HipTest::Unpinned, + typename T=float, + class P=HipTest::Unpinned, class C=HipTest::Memcpy > void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) diff --git a/hipamd/tests/src/runtimeApi/stream/hipNullStream.cpp b/hipamd/tests/src/runtimeApi/stream/hipNullStream.cpp index b610315608..af5547e3fb 100644 --- a/hipamd/tests/src/runtimeApi/stream/hipNullStream.cpp +++ b/hipamd/tests/src/runtimeApi/stream/hipNullStream.cpp @@ -119,7 +119,7 @@ void Streamer::reset() { HipTest::setDefaultData(_numElements, _A_h, _B_h, _C_h); H2D(); - + } @@ -238,7 +238,7 @@ int main(int argc, char *argv[]) nullStreamer->D2H(); HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); + HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); } } @@ -263,7 +263,7 @@ int main(int argc, char *argv[]) HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); + HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); } } @@ -289,10 +289,10 @@ int main(int argc, char *argv[]) // Copy with stream1, this could go async if the streamSync doesn't synchronize ALL the streams. HIPCHECK(hipMemcpyAsync(streamers[0]->_C_h, streamers[0]->_C_d, streamers[0]->_numElements*sizeof(int), hipMemcpyDeviceToHost, streamers[1]->_stream)); - + HIPCHECK(hipDeviceSynchronize()); - HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements); + HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements); } diff --git a/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp b/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp index c6a58ce7d4..4c49d80c05 100644 --- a/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp +++ b/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp @@ -59,23 +59,23 @@ const char *syncModeString(int syncMode) { void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch) { - // This test sends a long-running kernel to the null stream, then tests to see if the + // This test sends a long-running kernel to the null stream, then tests to see if the // specified synchronization technique is effective. // - // Some syncMode are not expected to correctly sync (for example "syncNone"). in these + // Some syncMode are not expected to correctly sync (for example "syncNone"). in these // cases the test sets expectMismatch and the check logic below will attempt to ensure that // the undesired synchronization did not occur - ie ensure the kernel is still running and did // not yet update the stop event. This can be tricky since if the kernel runs fast enough it - // may complete before the check. To prevent this, the addCountReverse has a count parameter - // which causes it to loop repeatedly, and the results are checked in reverse order. + // may complete before the check. To prevent this, the addCountReverse has a count parameter + // which causes it to loop repeatedly, and the results are checked in reverse order. // // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results // are checked and we test to make sure stop event has completed. - + if (!(testMask & p_tests)) { return; } - printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", + printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", testMask, syncModeString(syncMode), expectMismatch); size_t sizeBytes = numElements * sizeof(int); @@ -98,7 +98,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); // Launch kernel into null stream, should result in C_h == count. hipLaunchKernelGGL(HipTest::addCountReverse , dim3(blocks), dim3(threadsPerBlock), 0, 0 /*stream*/, C_d, C_h, numElements, count); - HIPCHECK(hipEventRecord(stop, 0/*default*/)); + HIPCHECK(hipEventRecord(stop, 0/*default*/)); switch (syncMode) { case syncNone: @@ -108,18 +108,18 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode s break; case syncOtherStream: // Does this synchronize with the null stream? - HIPCHECK(hipStreamSynchronize(otherStream)); + HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncMarkerThenOtherStream: case syncMarkerThenOtherNonBlockingStream: - - // this may wait for NULL stream depending hipStreamNonBlocking flag above - HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); - HIPCHECK(hipStreamSynchronize(otherStream)); + // this may wait for NULL stream depending hipStreamNonBlocking flag above + HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); + + HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncDevice: - HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); break; default: assert(0); @@ -197,7 +197,7 @@ void runTests(int64_t numElements) int main(int argc, char *argv[]) { // Can' destroy the default stream:// TODO - move to another test - HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle); + HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle); HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/); diff --git a/hipamd/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/hipamd/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index f5b1b79550..cf463be76a 100644 --- a/hipamd/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/hipamd/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -88,7 +88,7 @@ private: template Streamer::Streamer(int deviceId, T * A_d, size_t numElements, int commandType) : - _preA_d(NULL), + _preA_d(NULL), _A_d(A_d), _deviceId(deviceId), _numElements(numElements), @@ -239,7 +239,7 @@ size_t Streamer::check(int streamerNum, T initValue, T expectedOffset, bool e return _mismatchCount; } - + //--- //Parse arguments specific to this test. @@ -300,7 +300,7 @@ void checkAll(int initValue, std::vector &streamers, std::vector< for (int i=0; iexpectedAdd(); - + mismatchCount += streamers[i]->check(i+1, initValue, expected, expectPass); } @@ -330,7 +330,7 @@ void checkAll(int initValue, std::vector &streamers, std::vector< void sync_none(void) {}; -void sync_allDevices(int numDevices) +void sync_allDevices(int numDevices) { for (int d=0; d streamers) +void sync_queryAllUntilComplete(std::vector streamers) { for (int i=streamers.size()-1; i>=0; i--) { streamers[i]->queryUntilComplete(); @@ -347,7 +347,7 @@ void sync_queryAllUntilComplete(std::vector streamers) } -void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere) +void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, bool waitHere) { HIPCHECK(hipSetDevice(sideDeviceId)); @@ -389,7 +389,7 @@ int main(int argc, char *argv[]) initArray_h[i] = initValue; } HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice)); - + int numDevices; HIPCHECK(hipGetDeviceCount(&numDevices)); @@ -414,7 +414,7 @@ int main(int argc, char *argv[]) // A sideband stream channel that is independent from above. - // Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is + // Used to check to ensure the WaitEvent or other synchronization is working correctly since by default sideStream is // asynchronous wrt the other streams. std::vector sideStreams; for (int d=0; d Test 0x1000 simple null stream tests\n"); + printf ("==> Test 0x1000 simple null stream tests\n"); // try some null stream: hipStreamQuery(0); @@ -463,7 +463,7 @@ int main(int argc, char *argv[]) HIPCHECK(hipEventRecord(e1, s1)) HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/)); - + HIPCHECK(hipStreamDestroy(s1)); HIPCHECK(hipEventDestroy(e1)); } @@ -476,11 +476,11 @@ int main(int argc, char *argv[]) HIPCHECK(hipEventRecord(e1, hipStream_t(0))) HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/)); - + HIPCHECK(hipStreamDestroy(s1)); HIPCHECK(hipEventDestroy(e1)); } - + } From e434a23be55f627be063e5c6618cb9fdc6493fd0 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 13 Nov 2017 08:56:39 +0530 Subject: [PATCH 37/52] Fix module_api sample --- hipamd/samples/0_Intro/module_api/Makefile | 6 +++--- .../0_Intro/module_api/launchKernelHcc.cpp | 1 + hipamd/samples/0_Intro/module_api/runKernel.cpp | 1 + hipamd/samples/0_Intro/module_api/test.cl | 12 ------------ hipamd/samples/0_Intro/module_api/test.co | Bin 9824 -> 0 bytes 5 files changed, 5 insertions(+), 15 deletions(-) delete mode 100644 hipamd/samples/0_Intro/module_api/test.cl delete mode 100755 hipamd/samples/0_Intro/module_api/test.co diff --git a/hipamd/samples/0_Intro/module_api/Makefile b/hipamd/samples/0_Intro/module_api/Makefile index 38bd00a6a6..270d4c1211 100644 --- a/hipamd/samples/0_Intro/module_api/Makefile +++ b/hipamd/samples/0_Intro/module_api/Makefile @@ -5,7 +5,7 @@ endif HIPCC=$(HIP_PATH)/bin/hipcc HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) -all: vcpy_kernel.code runKernel.hip.out defaultDriver.hip.out +all: vcpy_kernel.code runKernel.hip.out launchKernelHcc.hip.out runKernel.hip.out: runKernel.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ @@ -13,8 +13,8 @@ runKernel.hip.out: runKernel.cpp launchKernelHcc.hip.out: launchKernelHcc.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ -defaultDriver.hip.out: defaultDriver.cpp - $(HIPCC) $(HIPCC_FLAGS) $< -o $@ +#defaultDriver.hip.out: defaultDriver.cpp +# $(HIPCC) $(HIPCC_FLAGS) $< -o $@ vcpy_kernel.code: vcpy_kernel.cpp $(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@ diff --git a/hipamd/samples/0_Intro/module_api/launchKernelHcc.cpp b/hipamd/samples/0_Intro/module_api/launchKernelHcc.cpp index e86e44cb24..7c90198b6a 100644 --- a/hipamd/samples/0_Intro/module_api/launchKernelHcc.cpp +++ b/hipamd/samples/0_Intro/module_api/launchKernelHcc.cpp @@ -72,6 +72,7 @@ int main(){ uint32_t one = 1; struct { + uint32_t _hidden[6]; void * _Ad; void * _Bd; } args; diff --git a/hipamd/samples/0_Intro/module_api/runKernel.cpp b/hipamd/samples/0_Intro/module_api/runKernel.cpp index 2759c02140..fb34f80b7b 100644 --- a/hipamd/samples/0_Intro/module_api/runKernel.cpp +++ b/hipamd/samples/0_Intro/module_api/runKernel.cpp @@ -68,6 +68,7 @@ int main(){ uint32_t one = 1; struct { + uint32_t _hidden[6]; void * _Ad; void * _Bd; } args; diff --git a/hipamd/samples/0_Intro/module_api/test.cl b/hipamd/samples/0_Intro/module_api/test.cl deleted file mode 100644 index 81b20cab0e..0000000000 --- a/hipamd/samples/0_Intro/module_api/test.cl +++ /dev/null @@ -1,12 +0,0 @@ -__kernel void memset(char in, __global int* out) { -int tx = get_global_id(0); -out[tx] = in; -} - - -__kernel void vadd(__global float *Ad, __global float *Bd, __global float *Cd, int N){ -int tx = get_global_id(0); -if(tx < N){ -Cd[tx] = Ad[tx] + Bd[tx]; -} -} diff --git a/hipamd/samples/0_Intro/module_api/test.co b/hipamd/samples/0_Intro/module_api/test.co deleted file mode 100755 index a3e6b991e39e55b9967ae08c86be94a583ce0dbe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9824 zcmeGi+in|0bi8q#khCh|B5|uUq7Q9R;_R+>H@ikcag#Q!>Xh`B3Zc@Bch`yO`hvZV z8c^LpLG*zK-~;#qzJcbkyp=xZp)W{%;2VH*c4oX@lU6MRTGTy~_spEjoHOT~nZ1l} z950_xGMPCdV1(Q!8En7E33D8g)kC5IXpY!q9IpGw1j#`^8__s_jw9f5*p3DxMB;Hl zM>6U?3@%S_j>OU*)Eh;X@NXF38R2>oOFrtMuY}?SfJWCoUKL%5rJe%xC()DhJNIko zMf0O#;32#!$kGn_hj143B7401*jKa_*+YjBhVX+iPL^249x?ii_Df3(ks7v<{wDk| zoJh}-u}BWOeHzdgmoIYJ?_qo#2W6+Hrzbw6tnTHP7FPUrr`Bu~>4#LGq8hzC6){eA z*r`UfS)^P}bT6?DzuIm#!t=FH{UWuDgmAg(vS7)tH`|+nZ<+-wA#(g;XW4JhUo6t3 z>Jqz#V6$>3okX>R~OsOZi|nutA?ErJHNPmt`r1Kcd^YH-2fcalGZOY zLl&Ir)={uiq_&zgQ}VpF-|4VQ;KxFm3Sq%ei4d&(#Yl1{(gwMRl3lQhSY$ zPXVM2Ao8e!B+eH4rJ%M3brBOz*BV}tikN}G3Jbdah0PWQy03+O?y9@a+I@1#bM>4vFQ~;}Q16q1nEP0f(G_FvEDp zdo-Z)5Dz0CW%36LY=e0o*ZEhoA|si)SgkcMk*J>)*g9D)BtEB_2fZULMh3;u?1Z9iIpA zhh6!I!6n3%-$4kj^8`KZq!CCXkVYVlz%vyA+{gJ3cYN5{#XkQqCrjiLfEcVMCSZnf+KJ%R4w0R2tOC@(z5D$wWS6@Ug44gVlun9{K%iLY{sdMXsioGy-V^ z(g>sxNF$I&AdNs8fiwbX1fJ;#$g}1LFK{;}g}3<5gAkpy16~%d`|_=Sf5a7M&d^pR zL(u!^Tf5VcuNGp1clS53#5wk_3hJ#SjG zOx18bdX(%o%#rmlY;}q=Gi$YQy<5q<&HBta=K0M=bZFVEx4NM}6KUo_glcZZa~$2Q zT2|Gr*sf{m%=dJM`F_PTD_*6l8LDBaN9p5@90VKnXkbG(w1VbY1&>twb`of+XcJoR8-Y44cjz*%hoJI_x&fHhV8hj zrMX^3ovr!>rn_pjU>H`V;5eq~!FhY7QgwYtJxa>8##O&vq~-DgPia5pyEX7T16EV{ zUlPV(!lv(9^Ao8&|P zS{U_Mz2=fU|3gFG?RYF?1Z%18H$VV1LskL2-U-{h#W6W{OuM2*SDg%=d|X-h;7n=h z)O^zUztO|EVT_`$N2o6w0?Yq%APWKiw=oE)i)&h{$#pLANg<4SXbZn>A;|TA0NS_* zNPW4^C8oebkbG%h;#Yx(=S7k)_YsNZK7wN)@H*jbydD%ZxlhS+DvA65^N}h;0~`W6 z>Y@)3&?H_JUFol!zxLu>A%ch8EE~v3t2?>h5knaG;9vrUjf=~`tM<20mLx81J_~wKLJkil%fCt From 4353fc06d7f8dbb48b727d1b500c336530b090f8 Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Sat, 11 Nov 2017 10:59:26 +0000 Subject: [PATCH 38/52] Add a couple of missing CHECK directives to concurrentKernels.cu --- hipamd/tests/hipify-clang/concurentKernels.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hipamd/tests/hipify-clang/concurentKernels.cu b/hipamd/tests/hipify-clang/concurentKernels.cu index e369baaf3e..27e9e0d0e7 100644 --- a/hipamd/tests/hipify-clang/concurentKernels.cu +++ b/hipamd/tests/hipify-clang/concurentKernels.cu @@ -99,6 +99,7 @@ int main(int argc, char **argv) // use command-line specified CUDA device, otherwise use device with highest Gflops/s cuda_device = findCudaDevice(argc, (const char **)argv); + // CHECK: hipDeviceProp_t deviceProp; cudaDeviceProp deviceProp; // CHECK: checkCudaErrors(hipGetDevice(&cuda_device)); checkCudaErrors(cudaGetDevice(&cuda_device)); @@ -135,6 +136,7 @@ int main(int argc, char **argv) checkCudaErrors(cudaStreamCreate(&(streams[i]))); } + // CHECK: hipEvent_t start_event, stop_event; // create CUDA event handles cudaEvent_t start_event, stop_event; From 0afc6b2e9584955852b5db0e0b74ad2d6ac16b6f Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Sat, 11 Nov 2017 11:00:40 +0000 Subject: [PATCH 39/52] Add a test that exposes #260 --- hipamd/tests/hipify-clang/allocators.cu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 hipamd/tests/hipify-clang/allocators.cu diff --git a/hipamd/tests/hipify-clang/allocators.cu b/hipamd/tests/hipify-clang/allocators.cu new file mode 100644 index 0000000000..3f130be227 --- /dev/null +++ b/hipamd/tests/hipify-clang/allocators.cu @@ -0,0 +1,18 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +#pragma once + +#include + + +/** + * Allocate GPU memory for `count` elements of type `T`. + */ +template +static T* gpuMalloc(size_t count) { + T* ret = nullptr; + // CHECK: hipMalloc(&ret, count * sizeof(T)); + cudaMalloc(&ret, count * sizeof(T)); + return ret; +} + From 9014c89f049fc8cda9369d7a3bc2c14fef6f53b5 Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Mon, 13 Nov 2017 17:20:07 +0000 Subject: [PATCH 40/52] Add hipify mappings for all CUDA headers that have HIP equivalents I'm particularly running into issues with `device_types.h` in real CUDA code... --- hipamd/hipify-clang/src/CUDA2HipMap.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hipamd/hipify-clang/src/CUDA2HipMap.cpp b/hipamd/hipify-clang/src/CUDA2HipMap.cpp index de6ddb2d74..b2e5251139 100644 --- a/hipamd/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipamd/hipify-clang/src/CUDA2HipMap.cpp @@ -312,9 +312,16 @@ const std::map CUDA_TYPE_NAME_MAP{ /// Maps cuda header names to hip header names. const std::map CUDA_INCLUDE_MAP{ // CUDA includes - {"cuda.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}}, - {"cuda_runtime.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}}, - {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", CONV_INCLUDE, API_RUNTIME}}, + {"cuda.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}}, + {"cuda_runtime.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}}, + {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", CONV_INCLUDE, API_RUNTIME}}, + {"channel_descriptor.h", {"hip/channel_descriptor.h", CONV_INCLUDE, API_RUNTIME}}, + {"device_functions.h", {"hip/device_functions.h", CONV_INCLUDE, API_RUNTIME}}, + {"driver_types.h", {"hip/driver_types.h", CONV_INCLUDE, API_RUNTIME}}, + {"cuComplex.h", {"hip/hip_complex.h", CONV_INCLUDE, API_RUNTIME}}, + {"cuda_fp16.h", {"hip/hip_fp16.h", CONV_INCLUDE, API_RUNTIME}}, + {"cuda_texture_types.h", {"hip/hip_texture_types.h", CONV_INCLUDE, API_RUNTIME}}, + {"vector_types.h", {"hip/hip_vector_types.h", CONV_INCLUDE, API_RUNTIME}}, // CUBLAS includes {"cublas.h", {"hipblas.h", CONV_INCLUDE, API_BLAS}}, From 7a441aef106826b8e94bf8f4f75e375091811845 Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Mon, 30 Oct 2017 03:47:50 +0000 Subject: [PATCH 41/52] Use a custom FrontendAction to simplify identifier translation Most of what hipify does is really just replacing CUDA idenitifers with HIP ones. CUDA function calls, preprocessor macro calls, enum references, types, etc. This is problematic: calls/types/enum-refs require name resolution for the AST matcher to work. This fails in the presence of code deleted by the preprocessor, and in two-pass template compilation. Instead, we can simply hook the lexer and have it rewrite the identifiers for us. This approach means identifier transformations will work correctly regardless of where they appear (and we get to delete lots of code) - Fixes #260 - Helps a bit with #207 - it will still fail to translate kernel calls in preprocessor-ignored code, but everything except kerel launches should translate correctly now, even in preprocessor-deleted code. --- hipamd/hipify-clang/src/ArgParse.cpp | 40 + hipamd/hipify-clang/src/ArgParse.h | 19 + hipamd/hipify-clang/src/CUDA2HipMap.cpp | 1 - hipamd/hipify-clang/src/Cuda2Hip.cpp | 861 ------------------ hipamd/hipify-clang/src/HipifyAction.cpp | 457 ++++++++++ hipamd/hipify-clang/src/HipifyAction.h | 83 ++ .../src/ReplacementsFrontendActionFactory.h | 28 + hipamd/hipify-clang/src/main.cpp | 155 ++++ 8 files changed, 782 insertions(+), 862 deletions(-) create mode 100644 hipamd/hipify-clang/src/ArgParse.cpp create mode 100644 hipamd/hipify-clang/src/ArgParse.h delete mode 100644 hipamd/hipify-clang/src/Cuda2Hip.cpp create mode 100644 hipamd/hipify-clang/src/HipifyAction.cpp create mode 100644 hipamd/hipify-clang/src/HipifyAction.h create mode 100644 hipamd/hipify-clang/src/ReplacementsFrontendActionFactory.h create mode 100644 hipamd/hipify-clang/src/main.cpp diff --git a/hipamd/hipify-clang/src/ArgParse.cpp b/hipamd/hipify-clang/src/ArgParse.cpp new file mode 100644 index 0000000000..b27643710f --- /dev/null +++ b/hipamd/hipify-clang/src/ArgParse.cpp @@ -0,0 +1,40 @@ +#include "ArgParse.h" + +cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); + +cl::opt OutputFilename("o", + cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ToolTemplateCategory)); + +cl::opt Inplace("inplace", + cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"), + cl::value_desc("inplace"), + cl::cat(ToolTemplateCategory)); + +cl::opt NoBackup("no-backup", + cl::desc("Don't create a backup file for the hipified source"), + cl::value_desc("no-backup"), + cl::cat(ToolTemplateCategory)); + +cl::opt NoOutput("no-output", + cl::desc("Don't write any translated output to stdout"), + cl::value_desc("no-output"), + cl::cat(ToolTemplateCategory)); + +cl::opt PrintStats("print-stats", + cl::desc("Print translation statistics"), + cl::value_desc("print-stats"), + cl::cat(ToolTemplateCategory)); + +cl::opt OutputStatsFilename("o-stats", + cl::desc("Output filename for statistics"), + cl::value_desc("filename"), + cl::cat(ToolTemplateCategory)); + +cl::opt Examine("examine", + cl::desc("Combines -no-output and -print-stats options"), + cl::value_desc("examine"), + cl::cat(ToolTemplateCategory)); + +cl::extrahelp CommonHelp(ct::CommonOptionsParser::HelpMessage); diff --git a/hipamd/hipify-clang/src/ArgParse.h b/hipamd/hipify-clang/src/ArgParse.h new file mode 100644 index 0000000000..b937a8dd15 --- /dev/null +++ b/hipamd/hipify-clang/src/ArgParse.h @@ -0,0 +1,19 @@ +#pragma once + +#include "clang/Tooling/CommonOptionsParser.h" +#include "llvm/Support/CommandLine.h" + +namespace cl = llvm::cl; +namespace ct = clang::tooling; + +extern cl::OptionCategory ToolTemplateCategory; + +extern cl::opt OutputFilename; +extern cl::opt Inplace; +extern cl::opt NoBackup; +extern cl::opt NoOutput; +extern cl::opt PrintStats; +extern cl::opt OutputStatsFilename; +extern cl::opt Examine; + +extern cl::extrahelp CommonHelp; diff --git a/hipamd/hipify-clang/src/CUDA2HipMap.cpp b/hipamd/hipify-clang/src/CUDA2HipMap.cpp index de6ddb2d74..650fac2079 100644 --- a/hipamd/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipamd/hipify-clang/src/CUDA2HipMap.cpp @@ -2677,7 +2677,6 @@ const std::map& CUDA_RENAMES_MAP() { // First run, so compute the union map. ret = CUDA_IDENTIFIER_MAP; - ret.insert(CUDA_INCLUDE_MAP.begin(), CUDA_INCLUDE_MAP.end()); ret.insert(CUDA_TYPE_NAME_MAP.begin(), CUDA_TYPE_NAME_MAP.end()); return ret; diff --git a/hipamd/hipify-clang/src/Cuda2Hip.cpp b/hipamd/hipify-clang/src/Cuda2Hip.cpp deleted file mode 100644 index a1cf80fde9..0000000000 --- a/hipamd/hipify-clang/src/Cuda2Hip.cpp +++ /dev/null @@ -1,861 +0,0 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/** - * @file Cuda2Hip.cpp - * - * This file is compiled and linked into clang based hipify tool. - */ -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Lex/Lexer.h" -#include "clang/Lex/MacroArgs.h" -#include "clang/Lex/MacroInfo.h" -#include "clang/Lex/PPCallbacks.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Rewrite/Core/Rewriter.h" -#include "clang/Tooling/CommonOptionsParser.h" -#include "clang/Tooling/Refactoring.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Signals.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "CUDA2HipMap.h" -#include "LLVMCompat.h" -#include "StringUtils.h" - -using namespace clang; -using namespace clang::ast_matchers; -using namespace clang::tooling; -using namespace llvm; - -#define DEBUG_TYPE "cuda2hip" - - -// Set up the command line options -static cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); - -static cl::opt OutputFilename("o", - cl::desc("Output filename"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -static cl::opt Inplace("inplace", - cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"), - cl::value_desc("inplace"), - cl::cat(ToolTemplateCategory)); - -static cl::opt NoBackup("no-backup", - cl::desc("Don't create a backup file for the hipified source"), - cl::value_desc("no-backup"), - cl::cat(ToolTemplateCategory)); - -static cl::opt NoOutput("no-output", - cl::desc("Don't write any translated output to stdout"), - cl::value_desc("no-output"), - cl::cat(ToolTemplateCategory)); - -static cl::opt PrintStats("print-stats", - cl::desc("Print translation statistics"), - cl::value_desc("print-stats"), - cl::cat(ToolTemplateCategory)); - -static cl::opt OutputStatsFilename("o-stats", - cl::desc("Output filename for statistics"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -static cl::opt Examine("examine", - cl::desc("Combines -no-output and -print-stats options"), - cl::value_desc("examine"), - cl::cat(ToolTemplateCategory)); - -static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); - -class Cuda2Hip { -public: - Cuda2Hip(Replacements& R, const std::string &srcFileName) : - Replace(R), mainFileName(srcFileName) {} - - enum msgTypes { - HIPIFY_ERROR = 0, - HIPIFY_WARNING - }; - - std::string getMsgType(msgTypes type) { - switch (type) { - case HIPIFY_ERROR: return "error"; - default: - case HIPIFY_WARNING: return "warning"; - } - } - -protected: - Replacements& Replace; - std::string mainFileName; - - virtual void insertReplacement(const Replacement &rep, const FullSourceLoc &fullSL) { - llcompat::insertReplacement(Replace, rep); - if (PrintStats) { - rep.getLength(); - Statistics::current().lineTouched(fullSL.getExpansionLineNumber()); - Statistics::current().bytesChanged(rep.getLength()); - } - } - void insertHipHeaders(Cuda2Hip *owner, const SourceManager &SM) { - if (Replace.size() > 0) { - std::string repName = "#include "; - Statistics::current().incrementCounter({repName, ConvTypes::CONV_INCLUDE_CUDA_MAIN_H, ApiTypes::API_RUNTIME}, "#include "); - SourceLocation sl = SM.getLocForStartOfFile(SM.getMainFileID()); - FullSourceLoc fullSL(sl, SM); - Replacement Rep(SM, sl, 0, repName + "\n"); - insertReplacement(Rep, fullSL); - } - } - - void printHipifyMessage(const SourceManager &SM, const SourceLocation &sl, const std::string &message, msgTypes msgType = HIPIFY_WARNING) { - FullSourceLoc fullSL(sl, SM); - llvm::errs() << "[HIPIFY] " << getMsgType(msgType) << ": " << mainFileName << ":" << fullSL.getExpansionLineNumber() << ":" << fullSL.getExpansionColumnNumber() << ": " << message << "\n"; - } - - void processString(StringRef s, SourceManager &SM, SourceLocation start) { - size_t begin = 0; - while ((begin = s.find("cu", begin)) != StringRef::npos) { - const size_t end = s.find_first_of(" ", begin + 4); - StringRef name = s.slice(begin, end); - const auto found = CUDA_RENAMES_MAP().find(name); - if (found != CUDA_RENAMES_MAP().end()) { - StringRef repName = found->second.hipName; - hipCounter counter = {"[string literal]", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.unsupported}; - Statistics::current().incrementCounter(counter, name.str()); - if (!counter.unsupported) { - SourceLocation sl = start.getLocWithOffset(begin + 1); - Replacement Rep(SM, sl, name.size(), repName); - FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); - } - } - - if (end == StringRef::npos) { - break; - } - begin = end + 1; - } - } -}; - -class Cuda2HipCallback; - -class HipifyPPCallbacks : public PPCallbacks, public SourceFileCallbacks, public Cuda2Hip { -public: - HipifyPPCallbacks(Replacements& R, const std::string &mainFileName) - : Cuda2Hip(R, mainFileName) {} - - virtual bool handleBeginSource(CompilerInstance &CI -#if LLVM_VERSION_MAJOR <= 4 - , StringRef Filename -#endif - ) override { - Preprocessor &PP = CI.getPreprocessor(); - SourceManager &SM = CI.getSourceManager(); - setSourceManager(&SM); - PP.addPPCallbacks(std::unique_ptr(this)); - setPreprocessor(&PP); - return true; - } - - virtual void handleEndSource() override; - - virtual void InclusionDirective(SourceLocation hash_loc, - const Token &include_token, - StringRef file_name, bool is_angled, - CharSourceRange filename_range, - const FileEntry *file, StringRef search_path, - StringRef relative_path, - const clang::Module *imported) override { - if (!_sm->isWrittenInMainFile(hash_loc) || !is_angled) { - return; // We're looking to rewrite angle-includes in the main file to point to hip. - } - - const auto found = CUDA_INCLUDE_MAP.find(file_name); - if (found == CUDA_INCLUDE_MAP.end()) { - // Not a CUDA include - don't touch it. - return; - } - - Statistics::current().incrementCounter(found->second, file_name.str()); - if (found->second.unsupported) { - // An unsupported CUDA header? Oh dear. Print a warning. - printHipifyMessage(*_sm, hash_loc, "Unsupported CUDA header used: " + file_name.str()); - return; - } - - StringRef repName = found->second.hipName; - DEBUG(dbgs() << "Include file found: " << file_name << "\n" - << "SourceLocation: " - << filename_range.getBegin().printToString(*_sm) << "\n" - << "Will be replaced with " << repName << "\n"); - SourceLocation sl = filename_range.getBegin(); - SourceLocation sle = filename_range.getEnd(); - const char *B = _sm->getCharacterData(sl); - const char *E = _sm->getCharacterData(sle); - SmallString<128> tmpData; - Replacement Rep(*_sm, sl, E - B, Twine("<" + repName + ">").toStringRef(tmpData)); - FullSourceLoc fullSL(sl, *_sm); - insertReplacement(Rep, fullSL); - } - - /** - * Look at, and consider altering, a given token. - * - * If it's not a CUDA identifier, nothing happens. - * If it's an unsupported CUDA identifier, a warning is emitted. - * Otherwise, the source file is updated with the corresponding hipification. - */ - void RewriteToken(Token t) { - // String literals containing CUDA references need fixing... - if (t.is(tok::string_literal)) { - StringRef s(t.getLiteralData(), t.getLength()); - processString(unquoteStr(s), *_sm, t.getLocation()); - return; - } else if (!t.isAnyIdentifier()) { - // If it's neither a string nor an identifier, we don't care. - return; - } - - StringRef name = t.getIdentifierInfo()->getName(); - const auto found = CUDA_RENAMES_MAP().find(name); - if (found == CUDA_RENAMES_MAP().end()) { - // So it's an identifier, but not CUDA? Boring. - return; - } - Statistics::current().incrementCounter(found->second, name.str()); - - SourceLocation sl = t.getLocation(); - if (found->second.unsupported) { - // An unsupported identifier? Curses! Warn the user. - printHipifyMessage(*_sm, sl, "Unsupported CUDA identifier used: " + name.str()); - return; - } - - StringRef repName = found->second.hipName; - Replacement Rep(*_sm, sl, name.size(), repName); - FullSourceLoc fullSL(sl, *_sm); - insertReplacement(Rep, fullSL); - } - - virtual void MacroDefined(const Token &MacroNameTok, - const MacroDirective *MD) override { - if (!_sm->isWrittenInMainFile(MD->getLocation()) || - MD->getKind() != MacroDirective::MD_Define) { - return; - } - - for (auto T : MD->getMacroInfo()->tokens()) { - RewriteToken(T); - } - } - - virtual void MacroExpands(const Token &MacroNameTok, - const MacroDefinition &MD, SourceRange Range, - const MacroArgs *Args) override { - - if (!_sm->isWrittenInMainFile(MacroNameTok.getLocation())) { - return; // Macros in headers are not our concern. - } - - // Is the macro itself a CUDA identifier? If so, rewrite it - RewriteToken(MacroNameTok); - - // If it's a macro with arguments, rewrite all the arguments as hip, too. - for (unsigned int i = 0; Args && i < MD.getMacroInfo()->GET_NUM_ARGS(); i++) { - std::vector toks; - // Code below is a kind of stolen from 'MacroArgs::getPreExpArgument' - // to workaround the 'const' MacroArgs passed into this hook. - const Token *start = Args->getUnexpArgument(i); - size_t len = Args->getArgLength(start) + 1; - llcompat::EnterPreprocessorTokenStream(*_pp, start, len, false); - - do { - toks.push_back(Token()); - Token &tk = toks.back(); - _pp->Lex(tk); - } while (toks.back().isNot(tok::eof)); - - _pp->RemoveTopOfLexerStack(); - // end of stolen code - - for (auto tok : toks) { - RewriteToken(tok); - } - } - } - - void EndOfMainFile() override {} - - void setSourceManager(SourceManager *sm) { _sm = sm; } - void setPreprocessor(Preprocessor *pp) { _pp = pp; } - void setMatch(Cuda2HipCallback *match) { Match = match; } - -private: - SourceManager *_sm = nullptr; - Preprocessor *_pp = nullptr; - Cuda2HipCallback *Match = nullptr; -}; - -class Cuda2HipCallback : public MatchFinder::MatchCallback, public Cuda2Hip { -private: - bool cudaCall(const MatchFinder::MatchResult &Result) { - const CallExpr *call = Result.Nodes.getNodeAs("cudaCall"); - if (!call) { - return false; // Another handler will do it. - } - - const FunctionDecl *funcDcl = call->getDirectCallee(); - std::string name = funcDcl->getDeclName().getAsString(); - SourceManager *SM = Result.SourceManager; - SourceLocation sl = call->getLocStart(); - - // TODO: Make a lookup table just for functions to improve performance. - const auto found = CUDA_IDENTIFIER_MAP.find(name); - if (found == CUDA_IDENTIFIER_MAP.end()) { - std::string msg = "the following reference is not handled: '" + name + "' [function call]."; - printHipifyMessage(*SM, sl, msg); - return true; - } - - const hipCounter& hipCtr = found->second; - Statistics::current().incrementCounter(hipCtr, name); - - if (hipCtr.unsupported) { - return true; // Silently fail when you find an unsupported member. - // TODO: Print a warning with the diagnostics API? - } - - size_t length = name.size(); - Replacement Rep(*SM, sl, length, hipCtr.hipName); - FullSourceLoc fullSL(sl, *SM); - insertReplacement(Rep, fullSL); - - return true; - } - - SourceRange getReadRange(clang::SourceManager &SM, const SourceRange &exprRange) { - SourceLocation begin = exprRange.getBegin(); - SourceLocation end = exprRange.getEnd(); - - bool beginSafe = !SM.isMacroBodyExpansion(begin) || Lexer::isAtStartOfMacroExpansion(begin, SM, LangOptions{}); - bool endSafe = !SM.isMacroBodyExpansion(end) || Lexer::isAtEndOfMacroExpansion(end, SM, LangOptions{}); - - if (beginSafe && endSafe) { - return {SM.getFileLoc(begin), SM.getFileLoc(end)}; - } else { - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; - } - } - - SourceRange getWriteRange(clang::SourceManager &SM, const SourceRange &exprRange) { - SourceLocation begin = exprRange.getBegin(); - SourceLocation end = exprRange.getEnd(); - - // If the range is contained within a macro, update the macro definition. - // Otherwise, use the file location and hope for the best. - if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) { - return {SM.getFileLoc(begin), SM.getFileLoc(end)}; - } - - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; - } - - StringRef readSourceText(clang::SourceManager& SM, const SourceRange& exprRange) { - return Lexer::getSourceText(CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, LangOptions(), nullptr); - } - - /** - * Get a string representation of the expression `arg`, unless it's a defaulting function - * call argument, in which case get a 0. Used for building argument lists to kernel calls. - */ - std::string stringifyZeroDefaultedArg(SourceManager& SM, const Expr* arg) { - if (isa(arg)) { - return "0"; - } else { - return readSourceText(SM, arg->getSourceRange()); - } - } - - bool cudaLaunchKernel(const MatchFinder::MatchResult &Result) { - StringRef refName = "cudaLaunchKernel"; - if (const CUDAKernelCallExpr *launchKernel = Result.Nodes.getNodeAs(refName)) { - SmallString<40> XStr; - raw_svector_ostream OS(XStr); - - LangOptions DefaultLangOptions; - SourceManager *SM = Result.SourceManager; - - const Expr& calleeExpr = *(launchKernel->getCallee()); - OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", "; - - // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. - const CallExpr& config = *(launchKernel->getConfig()); - - // Copy the two dimensional arguments verbatim. - OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), "; - OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), "; - - // The stream/memory arguments default to zero if omitted. - OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", "; - OS << stringifyZeroDefaultedArg(*SM, config.getArg(3)); - - // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. - int numArgs = launchKernel->getNumArgs(); - if (numArgs > 0) { - OS << ", "; - - // Start of the first argument. - SourceLocation argStart = launchKernel->getArg(0)->getLocStart(); - - // End of the last argument. - SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd(); - - OS << readSourceText(*SM, {argStart, argEnd}); - } - - OS << ")"; - - SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()}); - SourceLocation launchStart = replacementRange.getBegin(); - SourceLocation launchEnd = replacementRange.getEnd(); - - size_t length = SM->getCharacterData(Lexer::getLocForEndOfToken( - launchEnd, 0, *SM, DefaultLangOptions)) - - SM->getCharacterData(launchStart); - - Replacement Rep(*SM, launchStart, length, OS.str()); - FullSourceLoc fullSL(launchStart, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = {"hipLaunchKernelGGL", ConvTypes::CONV_KERN, ApiTypes::API_RUNTIME}; - Statistics::current().incrementCounter(counter, refName.str()); - return true; - } - return false; - } - - bool cudaBuiltin(const MatchFinder::MatchResult &Result) { - if (const MemberExpr *threadIdx = Result.Nodes.getNodeAs("cudaBuiltin")) { - if (const OpaqueValueExpr *refBase = - dyn_cast(threadIdx->getBase())) { - if (const DeclRefExpr *declRef = - dyn_cast(refBase->getSourceExpr())) { - SourceLocation sl = threadIdx->getLocStart(); - SourceManager *SM = Result.SourceManager; - StringRef name = declRef->getDecl()->getName(); - StringRef memberName = threadIdx->getMemberDecl()->getName(); - size_t pos = memberName.find_first_not_of("__fetch_builtin_"); - memberName = memberName.slice(pos, memberName.size()); - SmallString<128> tmpData; - name = Twine(name + "." + memberName).toStringRef(tmpData); - - // TODO: Make a lookup table just for builtins to improve performance. - const auto found = CUDA_IDENTIFIER_MAP.find(name); - if (found != CUDA_IDENTIFIER_MAP.end()) { - Statistics::current().incrementCounter(found->second, name.str()); - if (!found->second.unsupported) { - StringRef repName = found->second.hipName; - Replacement Rep(*SM, sl, name.size(), repName); - FullSourceLoc fullSL(sl, *SM); - insertReplacement(Rep, fullSL); - } - } else { - std::string msg = "the following reference is not handled: '" + name.str() + "' [builtin]."; - printHipifyMessage(*SM, sl, msg); - } - } - } - return true; - } - return false; - } - - bool cudaEnumConstantRef(const MatchFinder::MatchResult &Result) { - if (const DeclRefExpr *enumConstantRef = Result.Nodes.getNodeAs("cudaEnumConstantRef")) { - StringRef name = enumConstantRef->getDecl()->getName(); - SourceLocation sl = enumConstantRef->getLocStart(); - SourceManager *SM = Result.SourceManager; - - // TODO: Make a lookup table just for enum values to improve performance. - const auto found = CUDA_IDENTIFIER_MAP.find(name); - if (found != CUDA_IDENTIFIER_MAP.end()) { - Statistics::current().incrementCounter(found->second, name.str()); - if (!found->second.unsupported) { - StringRef repName = found->second.hipName; - Replacement Rep(*SM, sl, name.size(), repName); - FullSourceLoc fullSL(sl, *SM); - insertReplacement(Rep, fullSL); - } - } else { - std::string msg = "the following reference is not handled: '" + name.str() + "' [enum constant ref]."; - printHipifyMessage(*SM, sl, msg); - } - return true; - } - return false; - } - - bool cudaType(const MatchFinder::MatchResult& Result) { - const clang::TypeLoc* ret = Result.Nodes.getNodeAs("cudaType"); - if (!ret) { - return false; - } - - // Ignore qualifiers - they don't alter our decision to rename. - clang::UnqualTypeLoc tl = ret->getUnqualifiedLoc(); - const Type& typeObject = *(tl.getTypePtr()); - - std::string typeName = tl.getType().getAsString(); - - // Irritatingly, enum/struct types are identified as `enum/struct `, and unlike most compound - // types (such as pointers or references), there isn't another type node inside. So we have - // to make do with what we've got. There's probably a better way of doing this... - if (typeObject.isEnumeralType()) { - removePrefixIfPresent(typeName, "enum "); - } - if (typeObject.isStructureType()) { - removePrefixIfPresent(typeName, "struct "); - } - - // Do we have a replacement for this type? - const auto found = CUDA_TYPE_NAME_MAP.find(typeName); - if (found == CUDA_TYPE_NAME_MAP.end()) { - return false; - } - - SourceManager &SM = *(Result.SourceManager); - - // Start of the type expression to replace. - SourceLocation sl = tl.getBeginLoc(); - - const hipCounter& hipCtr = found->second; - if (hipCtr.unsupported) { - printHipifyMessage(SM, sl, "Unsupported CUDA '" + typeName); - return false; - } - - // Apply the rename! - Replacement Rep(SM, sl, typeName.size(), hipCtr.hipName); - FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); - - return true; - } - - bool cudaSharedIncompleteArrayVar(const MatchFinder::MatchResult &Result) { - StringRef refName = "cudaSharedIncompleteArrayVar"; - if (const VarDecl *sharedVar = Result.Nodes.getNodeAs(refName)) { - // Example: extern __shared__ uint sRadix1[]; - if (sharedVar->hasExternalFormalLinkage()) { - QualType QT = sharedVar->getType(); - std::string typeName; - if (QT->isIncompleteArrayType()) { - const ArrayType *AT = QT.getTypePtr()->getAsArrayTypeUnsafe(); - QT = AT->getElementType(); - if (QT.getTypePtr()->isBuiltinType()) { - QT = QT.getCanonicalType(); - const BuiltinType *BT = dyn_cast(QT); - if (BT) { - LangOptions LO; - LO.CUDA = true; - PrintingPolicy policy(LO); - typeName = BT->getName(policy); - } - } else { - typeName = QT.getAsString(); - } - } - if (!typeName.empty()) { - SourceLocation slStart = sharedVar->getLocStart(); - SourceLocation slEnd = sharedVar->getLocEnd(); - SourceManager *SM = Result.SourceManager; - size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; - std::string varName = sharedVar->getNameAsString(); - std::string repName = "HIP_DYNAMIC_SHARED(" + typeName + ", " + varName + ")"; - Replacement Rep(*SM, slStart, repLength, repName); - FullSourceLoc fullSL(slStart, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = { "HIP_DYNAMIC_SHARED", ConvTypes::CONV_MEM, ApiTypes::API_RUNTIME }; - Statistics::current().incrementCounter(counter, refName.str()); - } - } - return true; - } - return false; - } - - bool stringLiteral(const MatchFinder::MatchResult &Result) { - if (const clang::StringLiteral *sLiteral = Result.Nodes.getNodeAs("stringLiteral")) { - if (sLiteral->getCharByteWidth() == 1) { - StringRef s = sLiteral->getString(); - SourceManager *SM = Result.SourceManager; - processString(s, *SM, sLiteral->getLocStart()); - } - return true; - } - return false; - } - -public: - Cuda2HipCallback(Replacements& Replace, ast_matchers::MatchFinder *parent, HipifyPPCallbacks *PPCallbacks, const std::string &mainFileName) - : Cuda2Hip(Replace, mainFileName), owner(parent), PP(PPCallbacks) { - PP->setMatch(this); - } - - void run(const MatchFinder::MatchResult &Result) override { - if (cudaType(Result)) return; - if (cudaCall(Result)) return; - if (cudaBuiltin(Result)) return; - if (cudaEnumConstantRef(Result)) return; - if (cudaLaunchKernel(Result)) return; - if (cudaSharedIncompleteArrayVar(Result)) return; - if (stringLiteral(Result)) return; - } - -private: - ast_matchers::MatchFinder *owner; - HipifyPPCallbacks *PP; -}; - -void HipifyPPCallbacks::handleEndSource() { - insertHipHeaders(Match, *_sm); -} - -void addAllMatchers(ast_matchers::MatchFinder &Finder, Cuda2HipCallback *Callback) { - // Rewrite CUDA api calls to hip ones. - Finder.addMatcher( - callExpr( - isExpansionInMainFile(), - callee( - functionDecl( - matchesName("cu.*"), - unless( - // Clang generates structs with functions on them to represent things like - // threadIdx.x. We have other logic to handle those builtins directly, so - // we need to suppress the call-handling. - // We can't handle those directly in the call-handler without special-casing - // it unpleasantly, since the names of the functions are unique only per-struct. - matchesName("__fetch_builtin.*") - ) - ) - ) - ).bind("cudaCall"), - Callback - ); - - // Rewrite all references to CUDA types to their corresponding hip types. - Finder.addMatcher( - typeLoc( - isExpansionInMainFile() - ).bind("cudaType"), - Callback - ); - - // Replace references to CUDA names in string literals with the equivalent hip names. - Finder.addMatcher(stringLiteral(isExpansionInMainFile()).bind("stringLiteral"), Callback); - - // Replace the <<<...>>> language extension with a hip kernel launch - Finder.addMatcher(cudaKernelCallExpr(isExpansionInMainFile()).bind("cudaLaunchKernel"), Callback); - - // Replace cuda builtins. - Finder.addMatcher( - memberExpr( - isExpansionInMainFile(), - hasObjectExpression( - hasType( - cxxRecordDecl( - matchesName("__cuda_builtin_") - ) - ) - ) - ).bind("cudaBuiltin"), - Callback - ); - - // Map CUDA enum _values_ to their hip equivalents. - Finder.addMatcher( - declRefExpr( - isExpansionInMainFile(), - to( - enumConstantDecl( - matchesName("cu.*|CU.*") - ) - ) - ).bind("cudaEnumConstantRef"), - Callback - ); - - Finder.addMatcher( - varDecl( - isExpansionInMainFile(), - allOf( - hasAttr(attr::CUDAShared), - hasType(incompleteArrayType()) - ) - ).bind("cudaSharedIncompleteArrayVar"), - Callback - ); -} - -void copyFile(const std::string& src, const std::string& dst) { - std::ifstream source(src, std::ios::binary); - std::ofstream dest(dst, std::ios::binary); - dest << source.rdbuf(); -} - -int main(int argc, const char **argv) { - llcompat::PrintStackTraceOnErrorSignal(); - - CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::OneOrMore); - std::vector fileSources = OptionsParser.getSourcePathList(); - std::string dst = OutputFilename; - if (!dst.empty() && fileSources.size() > 1) { - llvm::errs() << "[HIPIFY] conflict: -o and multiple source files are specified.\n"; - return 1; - } - - if (NoOutput) { - if (Inplace) { - llvm::errs() << "[HIPIFY] conflict: both -no-output and -inplace options are specified.\n"; - return 1; - } - if (!dst.empty()) { - llvm::errs() << "[HIPIFY] conflict: both -no-output and -o options are specified.\n"; - return 1; - } - } - - if (Examine) { - NoOutput = PrintStats = true; - } - - int Result = 0; - - // Arguments for the Statistics print routines. - std::unique_ptr csv = nullptr; - llvm::raw_ostream* statPrint = nullptr; - if (!OutputStatsFilename.empty()) { - csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); - } - if (PrintStats) { - statPrint = &llvm::errs(); - } - - for (const auto & src : fileSources) { - if (dst.empty()) { - if (Inplace) { - dst = src; - } else { - dst = src + ".hip"; - } - } else if (Inplace) { - llvm::errs() << "[HIPIFY] conflict: both -o and -inplace options are specified.\n"; - return 1; - } - - std::string tmpFile = src + ".hipify-tmp"; - - // Create a copy of the file to work on. When we're done, we'll move this onto the - // output (which may mean overwriting the input, if we're in-place). - // Should we fail for some reason, we'll just leak this file and not corrupt the input. - copyFile(src, tmpFile); - - // Initialise the statistics counters for this file. - Statistics::setActive(src); - - // RefactoringTool operates on the file in-place. Giving it the output path is no good, - // because that'll break relative includes, and we don't want to overwrite the input file. - // So what we do is operate on a copy, which we then move to the output. - RefactoringTool Tool(OptionsParser.getCompilations(), tmpFile); - ast_matchers::MatchFinder Finder; - - // The Replacements to apply to the file `src`. - Replacements& replacementsToUse = llcompat::getReplacements(Tool, tmpFile); - HipifyPPCallbacks* PPCallbacks = new HipifyPPCallbacks(replacementsToUse, tmpFile); - Cuda2HipCallback Callback(replacementsToUse, &Finder, PPCallbacks, tmpFile); - - addAllMatchers(Finder, &Callback); - - auto action = newFrontendActionFactory(&Finder, PPCallbacks); - - Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("--cuda-host-only", ArgumentInsertPosition::BEGIN)); - - // Ensure at least c++11 is used. - Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("-std=c++11", ArgumentInsertPosition::BEGIN)); -#if defined(HIPIFY_CLANG_RES) - Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES)); -#endif - Tool.appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster()); - Result += Tool.run(action.get()); - Tool.clearArgumentsAdjusters(); - - LangOptions DefaultLangOptions; - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts); - DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, &DiagnosticPrinter, false); - - SourceManager SM(Diagnostics, Tool.getFiles()); - - Rewriter Rewrite(SM, DefaultLangOptions); - if (!Tool.applyAllReplacements(Rewrite)) { - DEBUG(dbgs() << "Skipped some replacements.\n"); - } - - // Either move the tmpfile to the output, or remove it. - if (!NoOutput) { - Result += Rewrite.overwriteChangedFiles(); - rename(tmpFile.c_str(), dst.c_str()); - } else { - remove(tmpFile.c_str()); - } - - Statistics::current().markCompletion(); - Statistics::current().print(csv.get(), statPrint); - - dst.clear(); - } - - if (fileSources.size() > 1) { - Statistics::printAggregate(csv.get(), statPrint); - } - - return Result; -} diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp new file mode 100644 index 0000000000..7608e4040b --- /dev/null +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -0,0 +1,457 @@ +#include "HipifyAction.h" + +#include + +#include "clang/Basic/SourceLocation.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" + +#include "LLVMCompat.h" +#include "CUDA2HipMap.h" +#include "StringUtils.h" +#include "ArgParse.h" + +namespace ct = clang::tooling; +namespace mat = clang::ast_matchers; + +void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + + size_t begin = 0; + while ((begin = s.find("cu", begin)) != StringRef::npos) { + const size_t end = s.find_first_of(" ", begin + 4); + StringRef name = s.slice(begin, end); + const auto found = CUDA_RENAMES_MAP().find(name); + if (found != CUDA_RENAMES_MAP().end()) { + StringRef repName = found->second.hipName; + hipCounter counter = {"[string literal]", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.unsupported}; + Statistics::current().incrementCounter(counter, name.str()); + + if (!counter.unsupported) { + clang::SourceLocation sl = start.getLocWithOffset(begin + 1); + ct::Replacement Rep(SM, sl, name.size(), repName); + clang::FullSourceLoc fullSL(sl, SM); + insertReplacement(Rep, fullSL); + } + } + + if (end == StringRef::npos) { + break; + } + + begin = end + 1; + } +} + +/** + * Look at, and consider altering, a given token. + * + * If it's not a CUDA identifier, nothing happens. + * If it's an unsupported CUDA identifier, a warning is emitted. + * Otherwise, the source file is updated with the corresponding hipification. + */ +void HipifyAction::RewriteToken(const clang::Token& t) { + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + + // String literals containing CUDA references need fixing... + if (t.is(clang::tok::string_literal)) { + StringRef s(t.getLiteralData(), t.getLength()); + RewriteString(unquoteStr(s), t.getLocation()); + return; + } else if (!t.isAnyIdentifier()) { + // If it's neither a string nor an identifier, we don't care. + return; + } + + StringRef name = t.getRawIdentifier(); + const auto found = CUDA_RENAMES_MAP().find(name); + if (found == CUDA_RENAMES_MAP().end()) { + // So it's an identifier, but not CUDA? Boring. + return; + } + + Statistics::current().incrementCounter(found->second, name.str()); + + clang::SourceLocation sl = t.getLocation(); + if (found->second.unsupported) { + // An unsupported identifier? Curses! Warn the user. + llvm::errs() << "Unsupported CUDA identifier used: " + name.str() << "\n"; + return; + } + + StringRef repName = found->second.hipName; + ct::Replacement Rep(SM, sl, name.size(), repName); + clang::FullSourceLoc fullSL(sl, SM); + insertReplacement(Rep, fullSL); +} + +namespace { + +clang::SourceRange getReadRange(clang::SourceManager& SM, const clang::SourceRange& exprRange) { + clang::SourceLocation begin = exprRange.getBegin(); + clang::SourceLocation end = exprRange.getEnd(); + + bool beginSafe = !SM.isMacroBodyExpansion(begin) || clang::Lexer::isAtStartOfMacroExpansion(begin, SM, clang::LangOptions{}); + bool endSafe = !SM.isMacroBodyExpansion(end) || clang::Lexer::isAtEndOfMacroExpansion(end, SM, clang::LangOptions{}); + + if (beginSafe && endSafe) { + return {SM.getFileLoc(begin), SM.getFileLoc(end)}; + } else { + return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; + } +} + + +clang::SourceRange getWriteRange(clang::SourceManager& SM, const clang::SourceRange& exprRange) { + clang::SourceLocation begin = exprRange.getBegin(); + clang::SourceLocation end = exprRange.getEnd(); + + // If the range is contained within a macro, update the macro definition. + // Otherwise, use the file location and hope for the best. + if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) { + return {SM.getFileLoc(begin), SM.getFileLoc(end)}; + } + + return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; +} + + +StringRef readSourceText(clang::SourceManager& SM, const clang::SourceRange& exprRange) { + return clang::Lexer::getSourceText(clang::CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, clang::LangOptions(), nullptr); +} + +/** + * Get a string representation of the expression `arg`, unless it's a defaulting function + * call argument, in which case get a 0. Used for building argument lists to kernel calls. + */ +std::string stringifyZeroDefaultedArg(clang::SourceManager& SM, const clang::Expr* arg) { + if (clang::isa(arg)) { + return "0"; + } else { + return readSourceText(SM, arg->getSourceRange()); + } +} + +} // anonymous namespace + + +void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, + const clang::Token&, + StringRef file_name, + bool is_angled, + clang::CharSourceRange filename_range, + const clang::FileEntry*, StringRef, + StringRef, const clang::Module*) { + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + if (!SM.isWrittenInMainFile(hash_loc)) { + return; + } + + const auto found = CUDA_INCLUDE_MAP.find(file_name); + if (found == CUDA_INCLUDE_MAP.end()) { + // Not a CUDA include - don't touch it. + return; + } + + // Special-casing to avoid duplication of the hip_runtime include. + if (found->second.hipName == "hip/hip_runtime.h") { + if (insertedRuntimeHeader) { + return; + } + + insertedRuntimeHeader = true; + } + + Statistics::current().incrementCounter(found->second, file_name.str()); + + clang::SourceLocation sl = filename_range.getBegin(); + if (found->second.unsupported) { + // An unsupported CUDA header? Oh dear. Print a warning. + clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics(); + DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header")); + return; + } + + const char *B = SM.getCharacterData(sl); + const char *E = SM.getCharacterData(filename_range.getEnd()); + clang::SmallString<128> includeBuffer; + clang::StringRef newInclude; + + // Keep the same include type that the user gave. + if (is_angled) { + newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); + } else { + newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer); + } + + ct::Replacement Rep(SM, sl, E - B, newInclude); + insertReplacement(Rep, clang::FullSourceLoc{sl, SM}); +} + + +bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result) { + StringRef refName = "cudaLaunchKernel"; + + const auto* launchKernel = Result.Nodes.getNodeAs(refName); + if (!launchKernel) { + return false; + } + + clang::SmallString<40> XStr; + llvm::raw_svector_ostream OS(XStr); + + clang::LangOptions DefaultLangOptions; + clang::SourceManager* SM = Result.SourceManager; + + const clang::Expr& calleeExpr = *(launchKernel->getCallee()); + OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", "; + + // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. + const clang::CallExpr& config = *(launchKernel->getConfig()); + + // Copy the two dimensional arguments verbatim. + OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), "; + OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), "; + + // The stream/memory arguments default to zero if omitted. + OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", "; + OS << stringifyZeroDefaultedArg(*SM, config.getArg(3)); + + // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. + int numArgs = launchKernel->getNumArgs(); + if (numArgs > 0) { + OS << ", "; + + // Start of the first argument. + clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart(); + + // End of the last argument. + clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd(); + + OS << readSourceText(*SM, {argStart, argEnd}); + } + + OS << ")"; + + clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()}); + clang::SourceLocation launchStart = replacementRange.getBegin(); + clang::SourceLocation launchEnd = replacementRange.getEnd(); + + size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart); + + ct::Replacement Rep(*SM, launchStart, length, OS.str()); + clang::FullSourceLoc fullSL(launchStart, *SM); + insertReplacement(Rep, fullSL); + hipCounter counter = {"hipLaunchKernelGGL", ConvTypes::CONV_KERN, ApiTypes::API_RUNTIME}; + Statistics::current().incrementCounter(counter, refName.str()); + + return true; +} + +bool HipifyAction::cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResult& Result) { + const clang::MemberExpr* threadIdx = Result.Nodes.getNodeAs("cudaBuiltin"); + if (!threadIdx) { + return false; + } + + const clang::OpaqueValueExpr* refBase = clang::dyn_cast(threadIdx->getBase()); + if (!refBase) { + return false; + } + + const clang::DeclRefExpr* declRef = clang::dyn_cast(refBase->getSourceExpr()); + if (!declRef) { + return false; + } + + clang::SourceLocation sl = threadIdx->getLocStart(); + clang::SourceManager* SM = Result.SourceManager; + StringRef name = declRef->getDecl()->getName(); + StringRef memberName = threadIdx->getMemberDecl()->getName(); + size_t pos = memberName.find_first_not_of("__fetch_builtin_"); + memberName = memberName.slice(pos, memberName.size()); + clang::SmallString<128> tmpData; + name = clang::Twine(name + "." + memberName).toStringRef(tmpData); + + const auto found = CUDA_IDENTIFIER_MAP.find(name); + if (found != CUDA_IDENTIFIER_MAP.end()) { + Statistics::current().incrementCounter(found->second, name.str()); + if (!found->second.unsupported) { + StringRef repName = found->second.hipName; + ct::Replacement Rep(*SM, sl, name.size(), repName); + clang::FullSourceLoc fullSL(sl, *SM); + insertReplacement(Rep, fullSL); + } + } else { + std::string msg = "the following reference is not handled: '" + name.str() + "' [builtin]."; + llvm::errs() << msg << "\n"; + } + + return true; +} + +bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result) { + StringRef refName = "cudaSharedIncompleteArrayVar"; + auto* sharedVar = Result.Nodes.getNodeAs(refName); + if (!sharedVar) { + return false; + } + + // Example: extern __shared__ uint sRadix1[]; + if (!sharedVar->hasExternalFormalLinkage()) { + return false; + } + + clang::QualType QT = sharedVar->getType(); + std::string typeName; + if (QT->isIncompleteArrayType()) { + const clang::ArrayType* AT = QT.getTypePtr()->getAsArrayTypeUnsafe(); + QT = AT->getElementType(); + if (QT.getTypePtr()->isBuiltinType()) { + QT = QT.getCanonicalType(); + const auto* BT = clang::dyn_cast(QT); + if (BT) { + clang::LangOptions LO; + LO.CUDA = true; + clang::PrintingPolicy policy(LO); + typeName = BT->getName(policy); + } + } else { + typeName = QT.getAsString(); + } + } + + if (!typeName.empty()) { + clang::SourceLocation slStart = sharedVar->getLocStart(); + clang::SourceLocation slEnd = sharedVar->getLocEnd(); + clang::SourceManager* SM = Result.SourceManager; + size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; + std::string varName = sharedVar->getNameAsString(); + std::string repName = "HIP_DYNAMIC_SHARED(" + typeName + ", " + varName + ")"; + ct::Replacement Rep(*SM, slStart, repLength, repName); + clang::FullSourceLoc fullSL(slStart, *SM); + insertReplacement(Rep, fullSL); + hipCounter counter = {"HIP_DYNAMIC_SHARED", ConvTypes::CONV_MEM, ApiTypes::API_RUNTIME}; + Statistics::current().incrementCounter(counter, refName.str()); + } + + return true; +} + +void HipifyAction::insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL) { + llcompat::insertReplacement(*replacements, rep); + if (PrintStats) { + rep.getLength(); + Statistics::current().lineTouched(fullSL.getExpansionLineNumber()); + Statistics::current().bytesChanged(rep.getLength()); + } +} + +std::unique_ptr HipifyAction::CreateASTConsumer(clang::CompilerInstance& CI, llvm::StringRef) { + Finder.reset(new clang::ast_matchers::MatchFinder); + + // Replace the <<<...>>> language extension with a hip kernel launch + Finder->addMatcher(mat::cudaKernelCallExpr(mat::isExpansionInMainFile()).bind("cudaLaunchKernel"), this); + + // Replace cuda builtins. + Finder->addMatcher( + mat::memberExpr( + mat::isExpansionInMainFile(), + mat::hasObjectExpression( + mat::hasType( + mat::cxxRecordDecl( + mat::matchesName("__cuda_builtin_") + ) + ) + ) + ).bind("cudaBuiltin"), + this + ); + + Finder->addMatcher( + mat::varDecl( + mat::isExpansionInMainFile(), + mat::allOf( + mat::hasAttr(clang::attr::CUDAShared), + mat::hasType(mat::incompleteArrayType()) + ) + ).bind("cudaSharedIncompleteArrayVar"), + this + ); + + // Ownership is transferred to the caller... + return Finder->newASTConsumer(); +} + +void HipifyAction::EndSourceFileAction() { + // Insert the hip header, if we didn't already do it by accident during substitution. + if (!insertedRuntimeHeader) { + // It's not sufficient to just replace CUDA headers with hip ones, because numerous CUDA headers are + // implicitly included by the compiler. Instead, we _delete_ CUDA headers, and unconditionally insert + // one copy of the hip include into every file. + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + + clang::SourceLocation sl = SM.getLocForStartOfFile(SM.getMainFileID()); + clang::FullSourceLoc fullSL(sl, SM); + ct::Replacement Rep(SM, sl, 0, "#include \n"); + insertReplacement(Rep, fullSL); + } + + clang::ASTFrontendAction::EndSourceFileAction(); +} + + +namespace { + +/** + * A silly little class to proxy PPCallbacks back to the HipifyAction class. + */ +class PPCallbackProxy : public clang::PPCallbacks { + HipifyAction& hipifyAction; + +public: + explicit PPCallbackProxy(HipifyAction& action): hipifyAction(action) {} + + void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token& include_token, + StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, + const clang::FileEntry* file, StringRef search_path, StringRef relative_path, + const clang::Module* imported) override { + hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported); + } +}; + +} + +void HipifyAction::ExecuteAction() { + clang::Preprocessor& PP = getCompilerInstance().getPreprocessor(); + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + + // Start lexing the specified input file. + const llvm::MemoryBuffer* FromFile = SM.getBuffer(SM.getMainFileID()); + clang::Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts()); + RawLex.SetKeepWhitespaceMode(true); + + // Perform a token-level rewrite of CUDA identifiers to hip ones. The raw-mode lexer gives us enough + // information to tell the difference between identifiers, string literals, and "other stuff". It also + // ignores preprocessor directives, so this transformation will operate inside preprocessor-deleted + // code. + clang::Token RawTok; + RawLex.LexFromRawLexer(RawTok); + while (RawTok.isNot(clang::tok::eof)) { + RewriteToken(RawTok); + RawLex.LexFromRawLexer(RawTok); + } + + // Register yourself as the preprocessor callback, by proxy. + PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this))); + + // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching. + clang::ASTFrontendAction::ExecuteAction(); +} + +void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Result) { + if (cudaBuiltin(Result)) return; + if (cudaLaunchKernel(Result)) return; + if (cudaSharedIncompleteArrayVar(Result)) return; +} diff --git a/hipamd/hipify-clang/src/HipifyAction.h b/hipamd/hipify-clang/src/HipifyAction.h new file mode 100644 index 0000000000..03d34601f3 --- /dev/null +++ b/hipamd/hipify-clang/src/HipifyAction.h @@ -0,0 +1,83 @@ +#pragma once + +#include "clang/Lex/PPCallbacks.h" +#include "clang/Tooling/Tooling.h" +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "ReplacementsFrontendActionFactory.h" + +namespace ct = clang::tooling; + +/** + * A FrontendAction that hipifies CUDA programs. + */ +class HipifyAction : public clang::ASTFrontendAction, + public clang::ast_matchers::MatchFinder::MatchCallback { +private: + ct::Replacements* replacements; + std::unique_ptr Finder; + + /// CUDA implicitly adds its runtime header. We rewrite explicitly-provided CUDA includes with equivalent + // ones, and track - using this flag - if the result led to us including the hip runtime header. If it did + // not, we insert it at the top of the file when we finish processing it. + // This approach means we do the best it's possible to do w.r.t preserving the user's include order. + bool insertedRuntimeHeader = false; + + /** + * Rewrite a string literal to refer to hip, not CUDA. + */ + void RewriteString(StringRef s, clang::SourceLocation start); + + /** + * Replace a CUDA identifier with the corresponding hip identifier, if applicable. + */ + void RewriteToken(const clang::Token &t); + +public: + explicit HipifyAction(ct::Replacements *replacements): + clang::ASTFrontendAction(), + replacements(replacements) {} + + // MatchCallback listeners + bool cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResult& Result); + bool cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result); + bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result); + + /** + * Called by the preprocessor for each include directive during the non-raw lexing pass. + */ + void InclusionDirective(clang::SourceLocation hash_loc, + const clang::Token &include_token, + StringRef file_name, + bool is_angled, + clang::CharSourceRange filename_range, + const clang::FileEntry *file, + StringRef search_path, + StringRef relative_path, + const clang::Module *imported); + +protected: + /** + * Add a Replacement for the current file. These will all be applied after executing the FrontendAction. + */ + void insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL); + + /** + * FrontendAction entry point. + */ + void ExecuteAction() override; + + /** + * Called at the start of each new file to process. + */ + void EndSourceFileAction() override; + + /** + * MatchCallback API entry point. Called by the AST visitor while searching the AST for things we registered an + * interest for. + */ + void run(const clang::ast_matchers::MatchFinder::MatchResult& Result) override; + + std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, llvm::StringRef InFile) override; +}; diff --git a/hipamd/hipify-clang/src/ReplacementsFrontendActionFactory.h b/hipamd/hipify-clang/src/ReplacementsFrontendActionFactory.h new file mode 100644 index 0000000000..7896635ef6 --- /dev/null +++ b/hipamd/hipify-clang/src/ReplacementsFrontendActionFactory.h @@ -0,0 +1,28 @@ +#pragma once + +#include "clang/Tooling/Tooling.h" +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/Core/Replacement.h" + +namespace ct = clang::tooling; + + +/** + * A FrontendActionFactory that propagates a set of Replacements into the FrontendAction. + * This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool. + * + * @tparam T The FrontendAction to create. + */ +template +class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory { + ct::Replacements* replacements; + +public: + explicit ReplacementsFrontendActionFactory(ct::Replacements* r): + ct::FrontendActionFactory(), + replacements(r) {} + + clang::FrontendAction* create() override { + return new T(replacements); + } +}; diff --git a/hipamd/hipify-clang/src/main.cpp b/hipamd/hipify-clang/src/main.cpp new file mode 100644 index 0000000000..0cc3594466 --- /dev/null +++ b/hipamd/hipify-clang/src/main.cpp @@ -0,0 +1,155 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/** + * @file Cuda2Hip.cpp + * + * This file is compiled and linked into clang based hipify tool. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "CUDA2HipMap.h" +#include "LLVMCompat.h" +#include "HipifyAction.h" +#include "ArgParse.h" + +#define DEBUG_TYPE "cuda2hip" + +namespace ct = clang::tooling; + + +namespace { + +void copyFile(const std::string& src, const std::string& dst) { + std::ifstream source(src, std::ios::binary); + std::ofstream dest(dst, std::ios::binary); + dest << source.rdbuf(); +} + +} // anonymous namespace + +int main(int argc, const char **argv) { + llcompat::PrintStackTraceOnErrorSignal(); + + ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::OneOrMore); + std::vector fileSources = OptionsParser.getSourcePathList(); + std::string dst = OutputFilename; + if (!dst.empty() && fileSources.size() > 1) { + llvm::errs() << "[HIPIFY] conflict: -o and multiple source files are specified.\n"; + return 1; + } + + if (NoOutput) { + if (Inplace) { + llvm::errs() << "[HIPIFY] conflict: both -no-output and -inplace options are specified.\n"; + return 1; + } + if (!dst.empty()) { + llvm::errs() << "[HIPIFY] conflict: both -no-output and -o options are specified.\n"; + return 1; + } + } + + if (Examine) { + NoOutput = PrintStats = true; + } + + int Result = 0; + + // Arguments for the Statistics print routines. + std::unique_ptr csv = nullptr; + llvm::raw_ostream* statPrint = nullptr; + if (!OutputStatsFilename.empty()) { + csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); + } + if (PrintStats) { + statPrint = &llvm::errs(); + } + + for (const auto & src : fileSources) { + if (dst.empty()) { + if (Inplace) { + dst = src; + } else { + dst = src + ".hip"; + } + } else if (Inplace) { + llvm::errs() << "[HIPIFY] conflict: both -o and -inplace options are specified.\n"; + return 1; + } + + std::string tmpFile = src + ".hipify-tmp"; + + // Create a copy of the file to work on. When we're done, we'll move this onto the + // output (which may mean overwriting the input, if we're in-place). + // Should we fail for some reason, we'll just leak this file and not corrupt the input. + copyFile(src, tmpFile); + + // Initialise the statistics counters for this file. + Statistics::setActive(src); + + // RefactoringTool operates on the file in-place. Giving it the output path is no good, + // because that'll break relative includes, and we don't want to overwrite the input file. + // So what we do is operate on a copy, which we then move to the output. + ct::RefactoringTool Tool(OptionsParser.getCompilations(), tmpFile); + ct::Replacements& replacementsToUse = llcompat::getReplacements(Tool, tmpFile); + + ReplacementsFrontendActionFactory actionFactory(&replacementsToUse); + + Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN)); + + // Ensure at least c++11 is used. + Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-std=c++11", ct::ArgumentInsertPosition::BEGIN)); +#if defined(HIPIFY_CLANG_RES) + Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES)); +#endif + Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster()); + + // Hipify _all_ the things! + if (Tool.runAndSave(&actionFactory)) { + DEBUG(llvm::dbgs() << "Skipped some replacements.\n"); + } + + // Either move the tmpfile to the output, or remove it. + if (!NoOutput) { + rename(tmpFile.c_str(), dst.c_str()); + } else { + remove(tmpFile.c_str()); + } + + Statistics::current().markCompletion(); + Statistics::current().print(csv.get(), statPrint); + + dst.clear(); + } + + if (fileSources.size() > 1) { + Statistics::printAggregate(csv.get(), statPrint); + } + + return Result; +} From 506deca07fd8c38f3a2fc7ae86e7bbfb555d35d7 Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Mon, 13 Nov 2017 16:14:42 +0000 Subject: [PATCH 42/52] Use proper clang diagnostics for printing warnings Much pretty. Very wow This gives users all the usual power when it comes to manipulating clang diagnostics. People can pass -Werror can have hipify fail if it doesn't completely translate a file, for example. Much nicer than reinventing the wheel. --- hipamd/hipify-clang/src/HipifyAction.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index 7608e4040b..192dd00949 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -76,7 +76,9 @@ void HipifyAction::RewriteToken(const clang::Token& t) { clang::SourceLocation sl = t.getLocation(); if (found->second.unsupported) { // An unsupported identifier? Curses! Warn the user. - llvm::errs() << "Unsupported CUDA identifier used: " + name.str() << "\n"; + clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics(); + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier unsupported in hip"); + DE.Report(sl, ID); return; } @@ -284,8 +286,9 @@ bool HipifyAction::cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResu insertReplacement(Rep, fullSL); } } else { - std::string msg = "the following reference is not handled: '" + name.str() + "' [builtin]."; - llvm::errs() << msg << "\n"; + clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics(); + const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unknown CUDA builtin"); + DE.Report(sl, ID); } return true; From e8c1e7bb11cf7fbe3cc38d1d307a9832a4e8779e Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Mon, 13 Nov 2017 16:20:36 +0000 Subject: [PATCH 43/52] Add a preprocessor conditional to one of the tests Hurrah, we can cope with ifdefs now (except for kernel launches) --- hipamd/tests/hipify-clang/axpy.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hipamd/tests/hipify-clang/axpy.cu b/hipamd/tests/hipify-clang/axpy.cu index 2fd62ac344..4dfabff7e7 100644 --- a/hipamd/tests/hipify-clang/axpy.cu +++ b/hipamd/tests/hipify-clang/axpy.cu @@ -33,8 +33,13 @@ int main(int argc, char* argv[]) { // CHECK: hipMalloc(&device_x, kDataLen * sizeof(float)); cudaMalloc(&device_x, kDataLen * sizeof(float)); +#ifdef HERRING // CHECK: hipMalloc(&device_y, kDataLen * sizeof(float)); cudaMalloc(&device_y, kDataLen * sizeof(float)); +#else + // CHECK: hipMalloc(&device_y, kDataLen * sizeof(double)); + cudaMalloc(&device_y, kDataLen * sizeof(double)); +#endif // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); From 642af94492e852352b1a2e63c57e7fedce75c160 Mon Sep 17 00:00:00 2001 From: Chris Kitching Date: Mon, 13 Nov 2017 21:02:42 +0000 Subject: [PATCH 44/52] Add an explicit check for proper rewriting of CUDA includes --- hipamd/tests/hipify-clang/axpy.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hipamd/tests/hipify-clang/axpy.cu b/hipamd/tests/hipify-clang/axpy.cu index 4dfabff7e7..071c503a35 100644 --- a/hipamd/tests/hipify-clang/axpy.cu +++ b/hipamd/tests/hipify-clang/axpy.cu @@ -2,6 +2,8 @@ #include +// CHECK: #include +#include #define TOKEN_PASTE(X, Y) X ## Y #define ARG_LIST_AS_MACRO a, device_x, device_y From e60a95d7ddc1fdf7a36fff7a9f3403758c26f4b9 Mon Sep 17 00:00:00 2001 From: Pierre Date: Mon, 13 Nov 2017 16:13:19 -0500 Subject: [PATCH 45/52] Fix missing MARKER_END Logging status of hipCtxSynchronize was missing Test if hip profiling is active for MARKER_END in ihipPostLaunchKernel Add MARKER_END after the completion of a kernel launched through the "grid launch" --- hipamd/src/grid_launch.cpp | 3 +++ hipamd/src/hip_context.cpp | 2 +- hipamd/src/hip_hcc.cpp | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hipamd/src/grid_launch.cpp b/hipamd/src/grid_launch.cpp index fd5c2a1573..9b3cf509c5 100644 --- a/hipamd/src/grid_launch.cpp +++ b/hipamd/src/grid_launch.cpp @@ -92,5 +92,8 @@ namespace hip_impl delete static_cast(locked_stream); locked_stream = nullptr; + if(HIP_PROFILE_API) { + MARKER_END(); + } } } diff --git a/hipamd/src/hip_context.cpp b/hipamd/src/hip_context.cpp index 69d75e7f31..d3d0691e55 100644 --- a/hipamd/src/hip_context.cpp +++ b/hipamd/src/hip_context.cpp @@ -269,7 +269,7 @@ hipError_t hipCtxGetSharedMemConfig ( hipSharedMemConfig * pConfig ) hipError_t hipCtxSynchronize ( void ) { HIP_INIT_API(1); - return ihipSynchronize(); //TODP Shall check validity of ctx? + return ihipLogStatus(ihipSynchronize()); //TODP Shall check validity of ctx? } hipError_t hipCtxGetFlags ( unsigned int* flags ) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 2d2d2745fe..2eff57f18b 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1592,7 +1592,9 @@ void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launc tprintf(DB_SYNC, "ihipPostLaunchKernel, unlocking stream\n"); stream->lockclose_postKernelCommand(kernelName, lp.av); - MARKER_END(); + if(HIP_PROFILE_API) { + MARKER_END(); + } } //================================================================================================= From 9fa3ed4f6f7082143b91fb01170e1e69dd797ddc Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Tue, 14 Nov 2017 16:37:59 +0530 Subject: [PATCH 46/52] [nvccTests]Enabled inline_asm_vadd on nvcc --- hipamd/tests/src/kernel/inline_asm_vadd.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hipamd/tests/src/kernel/inline_asm_vadd.cpp b/hipamd/tests/src/kernel/inline_asm_vadd.cpp index 7a941d31af..7adca312a9 100644 --- a/hipamd/tests/src/kernel/inline_asm_vadd.cpp +++ b/hipamd/tests/src/kernel/inline_asm_vadd.cpp @@ -39,7 +39,13 @@ __global__ void vadd_asm(hipLaunchParm lp, { int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - asm volatile ("v_add_f32_e32 %0, %1, %2" : "=v" (out[i]) : "v"(in[i]),"v" (out[i])); +#ifdef __HIP_PLATFORM_NVCC__ + asm volatile("add.f32 %0,%1,%2;":"=f"(out[i]):"f"(in[i]),"f"(out[i])); +#endif + +#ifdef __HIP_PLATFORM_HCC__ + asm volatile ("v_add_f32_e32 %0, %1, %2" : "=v" (out[i]) : "v"(in[i]),"v" (out[i])); +#endif } // CPU implementation of Vector Result From 20947f80fba711bc051742a06960131d46653628 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 15 Nov 2017 18:52:59 +0530 Subject: [PATCH 47/52] Fixed half2 issue on gfx900 --- hipamd/bin/hipcc | 1 + hipamd/tests/src/deviceLib/hipTestHalf.cpp | 33 +++++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index bbe079db41..b991bd40aa 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -439,6 +439,7 @@ if($HIP_PLATFORM eq "hcc"){ if ($target_gfx900 eq 1) { $HIPLDFLAGS .= " --amdgpu-target=gfx900"; $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 "; + $ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } } diff --git a/hipamd/tests/src/deviceLib/hipTestHalf.cpp b/hipamd/tests/src/deviceLib/hipTestHalf.cpp index 94a3882eea..873dd23805 100644 --- a/hipamd/tests/src/deviceLib/hipTestHalf.cpp +++ b/hipamd/tests/src/deviceLib/hipTestHalf.cpp @@ -19,13 +19,14 @@ THE SOFTWARE. #include #include -#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "test_common.h" #define LEN 64 #define HALF_SIZE 64*sizeof(__half) #define HALF2_SIZE 64*sizeof(__half2) -#if __HIP_ARCH_GFX803__ > 0 +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX803__ __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) { int tx = hipThreadIdx_x; @@ -61,15 +62,21 @@ __global__ void __half2Math(hipLaunchParm lp, __half2 *A, __half2 *B, __half2 *C #endif int main(){ - __half *A, *B, *C; - hipMalloc(&A, HALF_SIZE); - hipMalloc(&B, HALF_SIZE); - hipMalloc(&C, HALF_SIZE); - hipLaunchKernel(__halfMath, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A, B, C); - __half2 *A2, *B2, *C2; - hipMalloc(&A, HALF2_SIZE); - hipMalloc(&B, HALF2_SIZE); - hipMalloc(&C, HALF2_SIZE); - hipLaunchKernel(__half2Math, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A2, B2, C2); - + __half *A, *B, *C; + hipMalloc(&A, HALF_SIZE); + hipMalloc(&B, HALF_SIZE); + hipMalloc(&C, HALF_SIZE); + hipLaunchKernel(__halfMath, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A, B, C); + hipFree(A); + hipFree(B); + hipFree(C); + __half2 *A2, *B2, *C2; + hipMalloc(&A2, HALF2_SIZE); + hipMalloc(&B2, HALF2_SIZE); + hipMalloc(&C2, HALF2_SIZE); + hipLaunchKernel(__half2Math, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A2, B2, C2); + hipFree(A2); + hipFree(B2); + hipFree(C2); + passed(); } From cfc9f8746564a70628007ad90ab37d63f6e2f7d7 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 16 Nov 2017 09:34:52 +0530 Subject: [PATCH 48/52] Fixed test case for GFX900 --- hipamd/tests/src/deviceLib/hipTestHalf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/deviceLib/hipTestHalf.cpp b/hipamd/tests/src/deviceLib/hipTestHalf.cpp index 873dd23805..f56538f080 100644 --- a/hipamd/tests/src/deviceLib/hipTestHalf.cpp +++ b/hipamd/tests/src/deviceLib/hipTestHalf.cpp @@ -26,7 +26,7 @@ THE SOFTWARE. #define HALF_SIZE 64*sizeof(__half) #define HALF2_SIZE 64*sizeof(__half2) -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX803__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) { int tx = hipThreadIdx_x; From 0cc921f103c9cdbcebe03898d3bee9b5411e8e80 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 19 Nov 2017 01:54:12 +0000 Subject: [PATCH 49/52] This implements the trivial change needed to move back from the hip{Something}_{x, y, z} macros to the natural CUDA syntax of Something.{x, y, z}. This is contained in lines 384-404 in hip_runtime.h. All of the other changes have to do with changing unit tests to use this syntax. The macros are retained for backwards compatibility. --- hipamd/include/hip/hcc_detail/hip_runtime.h | 21 ++++++ hipamd/src/device_util.cpp | 12 ++-- .../device/hipFuncDeviceSynchronize.cpp | 2 +- hipamd/tests/src/deviceLib/hipComplex.cpp | 2 +- .../tests/src/deviceLib/hipDeviceMemcpy.cpp | 6 +- hipamd/tests/src/deviceLib/hipFloatMath.cpp | 2 +- .../src/deviceLib/hipSimpleAtomicsTest.cpp | 4 +- hipamd/tests/src/deviceLib/hipTestDevice.cpp | 32 ++++----- .../src/deviceLib/hipTestDeviceDouble.cpp | 28 ++++---- .../src/deviceLib/hipTestDeviceSymbol.cpp | 2 +- hipamd/tests/src/deviceLib/hipTestHalf.cpp | 4 +- hipamd/tests/src/deviceLib/hipThreadFence.cpp | 2 +- hipamd/tests/src/deviceLib/hip_anyall.cpp | 8 +-- hipamd/tests/src/deviceLib/hip_ballot.cpp | 8 +-- hipamd/tests/src/deviceLib/hip_brev.cpp | 4 +- hipamd/tests/src/deviceLib/hip_clz.cpp | 4 +- hipamd/tests/src/deviceLib/hip_ffs.cpp | 4 +- hipamd/tests/src/deviceLib/hip_popc.cpp | 4 +- hipamd/tests/src/deviceLib/hip_test_ldg.cpp | 4 +- .../src/deviceLib/hip_test_make_type.cpp | 70 +++++++++---------- hipamd/tests/src/deviceLib/hip_trig.cpp | 2 +- hipamd/tests/src/experimental/xcompile/hHip.c | 2 +- .../src/experimental/xcompile/hipxxKer.cpp | 2 +- .../src/experimental/xcompile/hxxHip.cpp | 2 +- hipamd/tests/src/hipC.c | 2 +- hipamd/tests/src/hipC.cpp | 2 +- hipamd/tests/src/hipCKernel.c | 2 +- hipamd/tests/src/kernel/hipDynamicShared.cpp | 4 +- hipamd/tests/src/kernel/hipDynamicShared2.cpp | 2 +- hipamd/tests/src/kernel/hipGridLaunch.cpp | 7 +- .../src/kernel/hipLanguageExtensions.cpp | 8 +-- hipamd/tests/src/kernel/hipTestConstant.cpp | 4 +- .../tests/src/kernel/hipTestMallocKernel.cpp | 4 +- hipamd/tests/src/kernel/hipTestMemKernel.cpp | 20 +++--- hipamd/tests/src/kernel/inline_asm_vadd.cpp | 4 +- hipamd/tests/src/kernel/inline_asm_vmac.cpp | 4 +- hipamd/tests/src/kernel/launch_bounds.cpp | 2 +- .../device/hipDeviceSynchronize.cpp | 2 +- .../src/runtimeApi/memory/hipHostGetFlags.cpp | 2 +- .../src/runtimeApi/memory/hipHostMalloc.cpp | 26 +++---- .../src/runtimeApi/memory/hipHostRegister.cpp | 12 ++-- .../src/runtimeApi/memory/hipMemcpyAsync.cpp | 24 +++---- .../memory/hipMemoryAllocateCoherent.cpp | 2 +- .../runtimeApi/memory/p2p_copy_coherency.cpp | 12 ++-- .../tests/src/runtimeApi/module/hipModule.cpp | 2 +- .../src/runtimeApi/module/vcpy_kernel.cpp | 2 +- .../multiThread/hipMultiThreadStreams2.cpp | 6 +- .../runtimeApi/stream/hipAPIStreamDisable.cpp | 4 +- .../runtimeApi/stream/hipAPIStreamEnable.cpp | 4 +- .../src/runtimeApi/stream/hipNullStream.cpp | 4 +- .../tests/src/runtimeApi/stream/hipStream.h | 8 +-- .../synchronization/copy_coherency.cpp | 40 +++++------ .../synchronization/memcpyInt.device.cpp | 6 +- hipamd/tests/src/stress/hipStressAsync.cpp | 4 +- hipamd/tests/src/stress/hipStressChain.cpp | 2 +- hipamd/tests/src/stress/hipStressKernel.cpp | 2 +- hipamd/tests/src/stress/hipStressSync.cpp | 4 +- hipamd/tests/src/test_common.h | 46 ++++++------ hipamd/tests/src/texture/hipTextureObj2D.cpp | 4 +- hipamd/tests/src/texture/hipTextureRef2D.cpp | 4 +- 60 files changed, 270 insertions(+), 248 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index d3211ed3f5..924e774af0 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -381,6 +381,27 @@ __device__ void __threadfence_system(void) ; * @} */ +template::type f> +class Coordinates { + using R = decltype(f(0)); + + struct X { __device__ operator R() const { return f(0); } }; + struct Y { __device__ operator R() const { return f(1); } }; + struct Z { __device__ operator R() const { return f(2); } }; +public: + static constexpr X x{}; + static constexpr Y y{}; + static constexpr Z z{}; +}; + +static constexpr Coordinates blockDim; +static constexpr Coordinates blockIdx; +static constexpr Coordinates gridDim; +static constexpr Coordinates threadIdx; #define hipThreadIdx_x (hc_get_workitem_id(0)) #define hipThreadIdx_y (hc_get_workitem_id(1)) diff --git a/hipamd/src/device_util.cpp b/hipamd/src/device_util.cpp index 6afc797ec6..367a4c1a4f 100644 --- a/hipamd/src/device_util.cpp +++ b/hipamd/src/device_util.cpp @@ -45,8 +45,8 @@ __device__ void *__hip_hc_malloc(size_t size) { return (void*)nullptr; } - uint32_t totalThreads = hipBlockDim_x * hipGridDim_x * hipBlockDim_y * hipGridDim_y * hipBlockDim_z * hipGridDim_z; - uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x; + uint32_t totalThreads = blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z; + uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x; uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads; uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads; @@ -932,7 +932,7 @@ __device__ unsigned long long int atomicMax(unsigned long long int* address, template __device__ T atomicCAS_impl(T* address, T compare, T val) { - // the implementation assumes the atomic is lock-free and + // the implementation assumes the atomic is lock-free and // has the same size as the non-atmoic equivalent type static_assert(sizeof(T) == sizeof(std::atomic) , "size mismatch between atomic and non-atomic types"); @@ -945,7 +945,7 @@ __device__ T atomicCAS_impl(T* address, T compare, T val) T expected = compare; - // hcc should generate a system scope atomic CAS + // hcc should generate a system scope atomic CAS std::atomic_compare_exchange_weak_explicit(u.atomic_address , &expected, val , std::memory_order_acq_rel @@ -1110,8 +1110,8 @@ __device__ void* __get_dynamicgroupbaseptr() { return hc::get_dynamic_group_segment_base_pointer(); } -__host__ void* __get_dynamicgroupbaseptr() { - return nullptr; +__host__ void* __get_dynamicgroupbaseptr() { + return nullptr; } // Precise Math Functions diff --git a/hipamd/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp b/hipamd/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp index dac56bf709..c8c2e644c3 100644 --- a/hipamd/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp +++ b/hipamd/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define NUM_STREAMS 2 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; // Kernel loop designed to execute very slowly... ... ... so we can test timing-related behavior below if(tx == 0){ for(int i = 0; i 0 __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) { - int tx = hipThreadIdx_x; + int tx = threadIdx.x; __half a = A[tx]; __half b = B[tx]; __half c = C[tx]; @@ -44,7 +44,7 @@ __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) { } __global__ void __half2Math(hipLaunchParm lp, __half2 *A, __half2 *B, __half2 *C) { - int tx = hipThreadIdx_x; + int tx = threadIdx.x; __half2 a = A[tx]; __half2 b = B[tx]; __half2 c = C[tx]; diff --git a/hipamd/tests/src/deviceLib/hipThreadFence.cpp b/hipamd/tests/src/deviceLib/hipThreadFence.cpp index 1bd9fbe02c..2f73b68529 100644 --- a/hipamd/tests/src/deviceLib/hipThreadFence.cpp +++ b/hipamd/tests/src/deviceLib/hipThreadFence.cpp @@ -33,7 +33,7 @@ THE SOFTWARE. __global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out) { - int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; In4[tid] = In1[tid] + In2[tid]; __threadfence(); In3[tid] = In3[tid] + In4[tid]; diff --git a/hipamd/tests/src/deviceLib/hip_anyall.cpp b/hipamd/tests/src/deviceLib/hip_anyall.cpp index bba7915052..f0b314ce8d 100644 --- a/hipamd/tests/src/deviceLib/hip_anyall.cpp +++ b/hipamd/tests/src/deviceLib/hip_anyall.cpp @@ -37,9 +37,9 @@ __global__ void warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift) { - int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; - device_any[hipThreadIdx_x>>pshift] = __any(tid -77); - device_all[hipThreadIdx_x>>pshift] = __all(tid -77); + int tid = threadIdx.x + blockIdx.x * blockDim.x; + device_any[threadIdx.x>>pshift] = __any(tid -77); + device_all[threadIdx.x>>pshift] = __all(tid -77); } int main(int argc, char *argv[]) @@ -49,7 +49,7 @@ int main(int argc, char *argv[]) warpSize = devProp.warpSize; int w = warpSize; - pshift = 0; + pshift = 0; while (w >>= 1) ++pshift; printf ("warpSize=%d pshift=%d\n", warpSize, pshift); diff --git a/hipamd/tests/src/deviceLib/hip_ballot.cpp b/hipamd/tests/src/deviceLib/hip_ballot.cpp index 742c47a065..14b8f314a1 100644 --- a/hipamd/tests/src/deviceLib/hip_ballot.cpp +++ b/hipamd/tests/src/deviceLib/hip_ballot.cpp @@ -34,12 +34,12 @@ __global__ void gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift) { - int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; - const unsigned int warp_num = hipThreadIdx_x >> pshift; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + const unsigned int warp_num = threadIdx.x >> pshift; #ifdef __HIP_PLATFORM_HCC__ - atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popcll(__ballot(tid - 245))); #else - atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); + atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popc(__ballot(tid - 245))); #endif } diff --git a/hipamd/tests/src/deviceLib/hip_brev.cpp b/hipamd/tests/src/deviceLib/hip_brev.cpp index 855a8bec47..c08c39dec9 100644 --- a/hipamd/tests/src/deviceLib/hip_brev.cpp +++ b/hipamd/tests/src/deviceLib/hip_brev.cpp @@ -72,8 +72,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { diff --git a/hipamd/tests/src/deviceLib/hip_clz.cpp b/hipamd/tests/src/deviceLib/hip_clz.cpp index bdb31f3e8d..53fd611184 100644 --- a/hipamd/tests/src/deviceLib/hip_clz.cpp +++ b/hipamd/tests/src/deviceLib/hip_clz.cpp @@ -83,8 +83,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { diff --git a/hipamd/tests/src/deviceLib/hip_ffs.cpp b/hipamd/tests/src/deviceLib/hip_ffs.cpp index c855ede060..49530bb298 100644 --- a/hipamd/tests/src/deviceLib/hip_ffs.cpp +++ b/hipamd/tests/src/deviceLib/hip_ffs.cpp @@ -73,8 +73,8 @@ HIP_kernel(hipLaunchParm lp, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { diff --git a/hipamd/tests/src/deviceLib/hip_popc.cpp b/hipamd/tests/src/deviceLib/hip_popc.cpp index e503e55b42..19dafb4d43 100644 --- a/hipamd/tests/src/deviceLib/hip_popc.cpp +++ b/hipamd/tests/src/deviceLib/hip_popc.cpp @@ -64,8 +64,8 @@ HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { diff --git a/hipamd/tests/src/deviceLib/hip_test_ldg.cpp b/hipamd/tests/src/deviceLib/hip_test_ldg.cpp index 171ff1afd0..63d50e881e 100644 --- a/hipamd/tests/src/deviceLib/hip_test_ldg.cpp +++ b/hipamd/tests/src/deviceLib/hip_test_ldg.cpp @@ -57,8 +57,8 @@ vectoradd_float(hipLaunchParm lp, T* a, const T* bm, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { diff --git a/hipamd/tests/src/deviceLib/hip_test_make_type.cpp b/hipamd/tests/src/deviceLib/hip_test_make_type.cpp index ef493ac923..6eba236e12 100644 --- a/hipamd/tests/src/deviceLib/hip_test_make_type.cpp +++ b/hipamd/tests/src/deviceLib/hip_test_make_type.cpp @@ -40,13 +40,13 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Z 1 -__global__ void +__global__ void vectoradd_char1(hipLaunchParm lp, - char1* a, const char1* bm, const char1* cm, int width, int height) + char1* a, const char1* bm, const char1* cm, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { @@ -54,40 +54,40 @@ vectoradd_char1(hipLaunchParm lp, } } -__global__ void +__global__ void vectoradd_char2(hipLaunchParm lp, - char2* a, const char2* bm, const char2* cm, int width, int height) + char2* a, const char2* bm, const char2* cm, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { a[i] = make_char2(bm[i].x, bm[i].y) + make_char2(cm[i].x, cm[i].y); } -} +} -__global__ void +__global__ void vectoradd_char3(hipLaunchParm lp, - char3* a, const char3* bm, const char3* cm, int width, int height) + char3* a, const char3* bm, const char3* cm, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { a[i] = make_char3(bm[i].x, bm[i].y, bm[i].z) + make_char3(cm[i].x, cm[i].y, cm[i].z); } } -__global__ void +__global__ void vectoradd_char4(hipLaunchParm lp, - char4* a, const char4* bm, const char4* cm, int width, int height) + char4* a, const char4* bm, const char4* cm, int width, int height) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; int i = y * width + x; if ( i < (width * height)) { @@ -99,7 +99,7 @@ vectoradd_char4(hipLaunchParm lp, #if 0 __kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { - + int x = blockDimX * blockIdx.x + threadIdx.x; int y = blockDimY * blockIdy.y + threadIdx.y; @@ -128,21 +128,21 @@ bool dataTypesRun(){ hostA = (T*)malloc(NUM * sizeof(T)); hostB = (T*)malloc(NUM * sizeof(T)); hostC = (T*)malloc(NUM * sizeof(T)); - + // initialize the input data for (i = 0; i < NUM; i++) { hostB[i] = (T)i; hostC[i] = (T)i; } - + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice)); HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice)); - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), + hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, @@ -192,21 +192,21 @@ bool dataTypesRun(){ hostA = (T*)malloc(NUM * sizeof(T)); hostB = (T*)malloc(NUM * sizeof(T)); hostC = (T*)malloc(NUM * sizeof(T)); - + // initialize the input data for (i = 0; i < NUM; i++) { hostB[i] = (T)i; hostC[i] = (T)i; } - + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice)); HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice)); - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), + hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, @@ -256,21 +256,21 @@ bool dataTypesRun(){ hostA = (T*)malloc(NUM * sizeof(T)); hostB = (T*)malloc(NUM * sizeof(T)); hostC = (T*)malloc(NUM * sizeof(T)); - + // initialize the input data for (i = 0; i < NUM; i++) { hostB[i] = (T)i; hostC[i] = (T)i; } - + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice)); HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice)); - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), + hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, @@ -319,21 +319,21 @@ bool dataTypesRunChar4(){ hostA = (T*)malloc(NUM * sizeof(T)); hostB = (T*)malloc(NUM * sizeof(T)); hostC = (T*)malloc(NUM * sizeof(T)); - + // initialize the input data for (i = 0; i < NUM; i++) { hostB[i] = (T)i; hostC[i] = (T)i; } - + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice)); HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice)); - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), + hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, @@ -368,7 +368,7 @@ bool dataTypesRunChar4(){ } int main() { - + hipDeviceProp_t devProp; hipGetDeviceProperties(&devProp, 0); cout << " System minor " << devProp.minor << endl; diff --git a/hipamd/tests/src/deviceLib/hip_trig.cpp b/hipamd/tests/src/deviceLib/hip_trig.cpp index 5ec28101f3..6ee8dc58ad 100644 --- a/hipamd/tests/src/deviceLib/hip_trig.cpp +++ b/hipamd/tests/src/deviceLib/hip_trig.cpp @@ -36,7 +36,7 @@ THE SOFTWARE. #define SIZE LEN<<2 __global__ void kernel_trig(hipLaunchParm lp, float *In, float *sin_d, float *cos_d, float *tan_d, float *sin_pd, float *cos_pd){ - int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; sin_d[tid] = __sinf(In[tid]); cos_d[tid] = __cosf(In[tid]); tan_d[tid] = __tanf(In[tid]); diff --git a/hipamd/tests/src/experimental/xcompile/hHip.c b/hipamd/tests/src/experimental/xcompile/hHip.c index 2ac4ebc73e..17e7e9ecf6 100644 --- a/hipamd/tests/src/experimental/xcompile/hHip.c +++ b/hipamd/tests/src/experimental/xcompile/hHip.c @@ -29,7 +29,7 @@ THE SOFTWARE. __global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len) { - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; if(tx < len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp b/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp index d1bbed63cd..5dca6c1bca 100644 --- a/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp +++ b/hipamd/tests/src/experimental/xcompile/hipxxKer.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. __global__ void Kern(hipLaunchParm lp, float *A) { - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; A[tx] += 1.0f; } diff --git a/hipamd/tests/src/experimental/xcompile/hxxHip.cpp b/hipamd/tests/src/experimental/xcompile/hxxHip.cpp index 6a748d5c89..bca5d64afc 100644 --- a/hipamd/tests/src/experimental/xcompile/hxxHip.cpp +++ b/hipamd/tests/src/experimental/xcompile/hxxHip.cpp @@ -33,7 +33,7 @@ class memManager; template __global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len) { - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; if(tx < Len) { Cd[tx] = Ad[tx] + Bd[tx]; diff --git a/hipamd/tests/src/hipC.c b/hipamd/tests/src/hipC.c index 644df6c98f..efa03bb909 100644 --- a/hipamd/tests/src/hipC.c +++ b/hipamd/tests/src/hipC.c @@ -34,7 +34,7 @@ THE SOFTWARE. #define SIZE 1024*1024*sizeof(int) __global__ void Iter(hipLaunchParm lp, int *Ad){ - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; if(tx == 0){ for(int i=0;i(my_sdata); #endif - size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - size_t tid = hipThreadIdx_x; + size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); + size_t tid = threadIdx.x; // initialize dynamic shared memory if (tid < groupElements) { diff --git a/hipamd/tests/src/kernel/hipDynamicShared2.cpp b/hipamd/tests/src/kernel/hipDynamicShared2.cpp index 95e70a9956..4567ff6fc2 100644 --- a/hipamd/tests/src/kernel/hipDynamicShared2.cpp +++ b/hipamd/tests/src/kernel/hipDynamicShared2.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. __global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) { HIP_DYNAMIC_SHARED(float, sBd); - int tx = hipThreadIdx_x; + int tx = threadIdx.x; for(int i=0;i syncMsg = {"event", "stream", "device"}; void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMethod, std::string msg) { - std::cerr << "test: CheckHostPointer " << msg + std::cerr << "test: CheckHostPointer " << msg //<< " HIP_COHERENT_HOST_ALLOC=" << HIP_COHERENT_HOST_ALLOC //<< " HIP_EVENT_SYS_RELEASE=" << HIP_EVENT_SYS_RELEASE - << " eventFlags = " << std::hex << eventFlags - << ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "") - << ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "") - << " ptr=" << ptr + << " eventFlags = " << std::hex << eventFlags + << ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "") + << ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "") + << " ptr=" << ptr << " syncMethod=" << syncMsg[syncMethod] << "\n"; hipStream_t s; @@ -93,7 +93,7 @@ void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMe default: assert(0); }; - + for (int i=0; i __global__ void Inc(hipLaunchParm lp, float *Ad){ - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; Ad[tx] = Ad[tx] + float(1); } template -void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internalRegister) +void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internalRegister) { A = A + offset; numElements -= offset; @@ -56,7 +56,7 @@ void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internal HIPCHECK(hipMemset(Bd, 13.0f, sizeBytes)); - // + // HIPCHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice)); HIPCHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost)); @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) const size_t size = N * sizeof(float); - if (p_tests & 0x1) { + if (p_tests & 0x1) { float *A, **Ad; int num_devices; HIPCHECK(hipGetDeviceCount(&num_devices)); @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) } - if (p_tests & 0x6) { + if (p_tests & 0x6) { // Sensitize HIP bug if device does not match where the memory was registered. HIPCHECK(hipSetDevice(0)); @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) HIPCHECK(hipMalloc(&Bd, size)); // TODO - set to 128 -#define OFFSETS_TO_TRY 128 +#define OFFSETS_TO_TRY 128 assert (N>OFFSETS_TO_TRY); if (p_tests & 0x2) { diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp index 22bd30689a..5cd46c808a 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp @@ -59,7 +59,7 @@ struct HostTraits static const char *Name() { return "Pinned"; } ; static void *Alloc(size_t sizeBytes) { - void *p; + void *p; HIPCHECK(hipHostMalloc((void**)&p, sizeBytes, hipHostMallocDefault)); return p; }; @@ -67,11 +67,11 @@ struct HostTraits template -__global__ void +__global__ void addK (hipLaunchParm lp, T *A, T K, size_t numElements) { - size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - size_t stride = hipBlockDim_x * hipGridDim_x ; + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; for (size_t i=offset; i -void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide) +void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide) { HIPASSERT(numElements % numInflight == 0); // Must be evenly divisible. size_t Nbytes = numElements*sizeof(T); @@ -95,7 +95,7 @@ void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); printf ("-----------------------------------------------------------------------------------------------\n"); - printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n", + printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n", __func__, HostTraits::Name(), Nbytes, (double)(Nbytes)/1024.0/1024.0, numPongs, numInflight, eachCopyElements, eachCopyBytes); T *A_h = NULL; @@ -176,7 +176,7 @@ void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies, size_t eachCopyBytes = eachCopyElements * sizeof(T); printf ("-----------------------------------------------------------------------------------------------\n"); - printf ("testing: %s Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n", + printf ("testing: %s Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n", __func__, Nbytes, (double)(Nbytes)/1024.0/1024.0, numCopies, eachCopyElements, eachCopyBytes); T *A_d; @@ -194,7 +194,7 @@ void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies, //stream=0; // fixme TODO - for (int i=0; i=0; i--) { diff --git a/hipamd/tests/src/runtimeApi/module/hipModule.cpp b/hipamd/tests/src/runtimeApi/module/hipModule.cpp index 1b7b62cff2..f2c2137738 100644 --- a/hipamd/tests/src/runtimeApi/module/hipModule.cpp +++ b/hipamd/tests/src/runtimeApi/module/hipModule.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define kernel_name "hello_world" __global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){ - int tx = hipThreadIdx_x; + int tx = threadIdx.x; Bd[tx] = Ad[tx]; } diff --git a/hipamd/tests/src/runtimeApi/module/vcpy_kernel.cpp b/hipamd/tests/src/runtimeApi/module/vcpy_kernel.cpp index 0375eee342..7ee1ad333b 100644 --- a/hipamd/tests/src/runtimeApi/module/vcpy_kernel.cpp +++ b/hipamd/tests/src/runtimeApi/module/vcpy_kernel.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b) { - int tx = hipThreadIdx_x; + int tx = threadIdx.x; b[tx] = a[tx]; } diff --git a/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp b/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp index 43a3e9bdea..3727901645 100644 --- a/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp +++ b/hipamd/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp @@ -35,7 +35,7 @@ THE SOFTWARE. template __global__ void Inc(hipLaunchParm lp, T *Array){ -int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +int tx = threadIdx.x + blockIdx.x * blockDim.x; Array[tx] = Array[tx] + T(1); } @@ -116,7 +116,7 @@ int main(int argc, char **argv) } const size_t size = N * sizeof(float); - + for (int i=0; i< iterations; i++) { std::thread t1(run1, size, stream[0]); @@ -126,7 +126,7 @@ int main(int argc, char **argv) // std::cout<<"T1"< __global__ void Inc(hipLaunchParm lp, T *In){ -int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +int tx = threadIdx.x + blockIdx.x * blockDim.x; In[tx] = In[tx] + 1; } @@ -94,12 +94,12 @@ void initArrays(T **Ad, T **Ah, } template -void initArrays(T **Ad, size_t N, - bool deviceMemory = false, +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, bool usePinnedHost = false){ size_t NBytes = N * sizeof(T); if(deviceMemory){ - HIPCHECK( hipMalloc(Ad, NBytes)); + HIPCHECK( hipMalloc(Ad, NBytes)); }else{ if(usePinnedHost){ HIPCHECK(hipHostMalloc((void**)Ad, NBytes, hipHostMallocDefault)); diff --git a/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp index 2e514e1e3a..b2a66f61e2 100644 --- a/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp +++ b/hipamd/tests/src/runtimeApi/synchronization/copy_coherency.cpp @@ -27,14 +27,14 @@ THE SOFTWARE. // TODO - add code object support here. /* HIT_START * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * RUN: %t + * RUN: %t * HIT_END */ // Test cache management (fences) and synchronization between kernel and copy commands. -// Exhaustively tests 3 command types (copy, kernel, module kernel), -// many sync types (see SyncType), followed by another command, across a sweep +// Exhaustively tests 3 command types (copy, kernel, module kernel), +// many sync types (see SyncType), followed by another command, across a sweep // of data sizes designed to stress various levels of the memory hierarchy. #include "hip/hip_runtime.h" @@ -102,8 +102,8 @@ MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); __global__ void memsetIntKernel(int * ptr, const int val, size_t numElements) { - int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - int stride = hipBlockDim_x * hipGridDim_x ; + int gid = (blockIdx.x * blockDim.x + threadIdx.x); + int stride = blockDim.x * gridDim.x ; for (size_t i= gid; i< numElements; i+=stride){ ptr[i] = val; } @@ -112,15 +112,15 @@ memsetIntKernel(int * ptr, const int val, size_t numElements) __global__ void memcpyIntKernel(int *dst, const int * src, size_t numElements) { - int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - int stride = hipBlockDim_x * hipGridDim_x ; + int gid = (blockIdx.x * blockDim.x + threadIdx.x); + int stride = blockDim.x * gridDim.x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } }; -// CHeck arrays in reverse order, to more easily detect cases where +// CHeck arrays in reverse order, to more easily detect cases where // the copy is "partially" done. void checkReverse(const int *ptr, int numElements, int expected) { int mismatchCnt = 0; @@ -157,7 +157,7 @@ const char * CmdTypeStr(CmdType c) ENUM_CASE_STR(MODULE_KERNEL); default: return "UNKNOWN"; }; -} +} enum SyncType { @@ -223,16 +223,16 @@ void resetInputs( int * Ad, int * Bd, int *Cd, int *Ch, size_t numElements, int // Intended to test proper synchronization and cache flushing between CMDA and CMDB. // CMD are of type CmdType. All command copy memory, using either hipMemcpyAsync or kernel implementations. -// CmdA copies from Ad to Bd, +// CmdA copies from Ad to Bd, // Some form of synchronization is applied. // Then cmdB copies from Bd to Cd. // // Cd is then copied to host Ch using a memory copy. // // Correct result at the end is that Ch contains the contents originally in Ad (integer 0x42) -void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, +void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, hipStream_t stream1, hipStream_t stream2, int numElements, - int * Ad, int * Bd, int *Cd, int *Ch, + int * Ad, int * Bd, int *Cd, int *Ch, int expected) { hipEvent_t e; @@ -241,14 +241,14 @@ void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, resetInputs(Ad, Bd, Cd, Ch, numElements, expected); const size_t sizeElements = numElements * sizeof(int); - fprintf (stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", + fprintf (stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", sizeElements, (double) (sizeElements/1024.0), CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType)); if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { fprintf (stderr, "warn: skipping since test infra does not yet support modules\n"); return; } - + // Step A: runCmd(cmdAType, Bd, Ad, stream1, numElements); @@ -334,7 +334,7 @@ void testWrapper(size_t numElements) fprintf (stderr, "test: init complete, start running tests\n"); - runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); + runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); for (int cmdA=0; cmdA - + extern "C" __global__ void memcpyIntKernel(hipLaunchParm lp, int *dst, const int * src, size_t numElements) { - int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - int stride = hipBlockDim_x * hipGridDim_x ; + int gid = (blockIdx.x * blockDim.x + threadIdx.x); + int stride = blockDim.x * gridDim.x ; for (size_t i= gid; i< numElements; i+=stride){ dst[i] = src[i]; } diff --git a/hipamd/tests/src/stress/hipStressAsync.cpp b/hipamd/tests/src/stress/hipStressAsync.cpp index e06e16809c..a142b41730 100644 --- a/hipamd/tests/src/stress/hipStressAsync.cpp +++ b/hipamd/tests/src/stress/hipStressAsync.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. #define ITER 1<<10 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ - int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int tx = threadIdx.x + blockIdx.x * blockDim.x; if(tx == 0){ for(int i = 0; i=0; i-=stride) { C_d[i] = A_d[i] + B_d[i]; @@ -169,8 +169,8 @@ addCount( const T *A_d, size_t NELEM, int count) { - size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - size_t stride = hipBlockDim_x * hipGridDim_x ; + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; // Deliberately do this in an inefficient way to increase kernel runtime for (int i=0; i=0; i-=stride) { C_d[i] = val; @@ -219,12 +219,12 @@ void setDefaultData(size_t numElements, T *A_h, T* B_h, T *C_h) { // Initialize the host data: for (size_t i=0; i void initArrays(T **A_d, T **B_d, T **C_d, - T **A_h, T **B_h, T **C_h, - size_t N, bool usePinnedHost=false) + T **A_h, T **B_h, T **C_h, + size_t N, bool usePinnedHost=false) { size_t Nbytes = N*sizeof(T); @@ -317,7 +317,7 @@ void freeArraysForHost(T *A_h, T *B_h, T *C_h, bool usePinnedHost) template void freeArrays(T *A_d, T *B_d, T *C_d, - T *A_h, T *B_h, T *C_h, bool usePinnedHost) + T *A_h, T *B_h, T *C_h, bool usePinnedHost) { if (A_d) { HIPCHECK ( hipFree(A_d) ); @@ -453,9 +453,9 @@ struct Pinned { static const bool isPinned = true; static const char *str() { return "Pinned"; }; - static void *Alloc(size_t sizeBytes) + static void *Alloc(size_t sizeBytes) { - void *p; + void *p; HIPCHECK(hipHostMalloc((void**)&p, sizeBytes)); return p; }; @@ -463,12 +463,12 @@ struct Pinned { //--- -struct Unpinned +struct Unpinned { static const bool isPinned = false; static const char *str() { return "Unpinned"; }; - static void *Alloc(size_t sizeBytes) + static void *Alloc(size_t sizeBytes) { void *p = malloc (sizeBytes); HIPASSERT(p); @@ -496,7 +496,7 @@ template<> struct MemTraits { - static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind)); } @@ -507,7 +507,7 @@ template<> struct MemTraits { - static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream)); } diff --git a/hipamd/tests/src/texture/hipTextureObj2D.cpp b/hipamd/tests/src/texture/hipTextureObj2D.cpp index 443d708418..9ddafd6b1c 100644 --- a/hipamd/tests/src/texture/hipTextureObj2D.cpp +++ b/hipamd/tests/src/texture/hipTextureObj2D.cpp @@ -17,8 +17,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; - int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; + int x = blockIdx.x*blockDim.x + threadIdx.x; + int y = blockIdx.y*blockDim.y + threadIdx.y; outputData[y*width + x] = tex2D(textureObject, x, y); } diff --git a/hipamd/tests/src/texture/hipTextureRef2D.cpp b/hipamd/tests/src/texture/hipTextureRef2D.cpp index eb27b23230..4430ca722d 100644 --- a/hipamd/tests/src/texture/hipTextureRef2D.cpp +++ b/hipamd/tests/src/texture/hipTextureRef2D.cpp @@ -18,8 +18,8 @@ __global__ void tex2DKernel(float* outputData, int width, int height) { - int x = hipBlockIdx_x*hipBlockDim_x + hipThreadIdx_x; - int y = hipBlockIdx_y*hipBlockDim_y + hipThreadIdx_y; + int x = blockIdx.x*blockDim.x + threadIdx.x; + int y = blockIdx.y*blockDim.y + threadIdx.y; #ifdef __HIP_PLATFORM_HCC__ outputData[y*width + x] = tex2D(tex, textureObject, x, y); #else From fc34fd6f03813ff9ce648f98a5c7b4891a71ad52 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 19 Nov 2017 15:21:37 -0600 Subject: [PATCH 50/52] Temporarily disable P2P on nvidia (fails on dual GPU) --- hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp index 9d46ccb0d8..b01a0aeb1d 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */ From aeadc3f18ff70472360c752e18774c542e4fa1c5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 19 Nov 2017 15:31:02 -0600 Subject: [PATCH 51/52] Fix test on cuda --- hipamd/tests/src/kernel/hipGridLaunch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/kernel/hipGridLaunch.cpp b/hipamd/tests/src/kernel/hipGridLaunch.cpp index f2cea64492..dff6527e2c 100644 --- a/hipamd/tests/src/kernel/hipGridLaunch.cpp +++ b/hipamd/tests/src/kernel/hipGridLaunch.cpp @@ -52,7 +52,7 @@ vectorADD2( hipLaunchParm lp, size_t stride = blockDim.x * gridDim.x ; for (size_t i=offset; i Date: Thu, 23 Nov 2017 09:57:24 +0530 Subject: [PATCH 52/52] Fix float2int rounding functions Change-Id: I67943859a6344c5eec0eaa23418c9b802ef72468 --- hipamd/src/device_functions.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/hipamd/src/device_functions.cpp b/hipamd/src/device_functions.cpp index 615ae4d0b7..3c9bf334fa 100644 --- a/hipamd/src/device_functions.cpp +++ b/hipamd/src/device_functions.cpp @@ -23,6 +23,11 @@ THE SOFTWARE. #include #include "device_util.h" +extern "C" float __ocml_floor_f32(float); +extern "C" float __ocml_rint_f32(float); +extern "C" float __ocml_ceil_f32(float); +extern "C" float __ocml_trunc_f32(float); + struct holder64Bit{ union{ double d; @@ -151,19 +156,19 @@ __device__ long long int __double_as_longlong(double x) __device__ int __float2int_rd(float x) { - return (int)x; + return (int)__ocml_floor_f32(x); } __device__ int __float2int_rn(float x) { - return (int)x; + return (int)__ocml_rint_f32(x); } __device__ int __float2int_ru(float x) { - return (int)x; + return (int)__ocml_ceil_f32(x); } __device__ int __float2int_rz(float x) { - return (int)x; + return (int)__ocml_trunc_f32(x); } __device__ long long int __float2ll_rd(float x)