diff --git a/include/hip_runtime.h b/include/hip_runtime.h index 59d3d6c4c9..0594726c90 100644 --- a/include/hip_runtime.h +++ b/include/hip_runtime.h @@ -43,6 +43,11 @@ THE SOFTWARE. #include #include +#ifdef __cplusplus +#include +#endif + + #include #if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__) diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index b4d9b06ad6..d0050a3734 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -242,12 +242,10 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att cdattr = cudaDevAttrMaxRegistersPerBlock; break; case hipDeviceAttributeClockRate: cdattr = cudaDevAttrClockRate; break; -#ifdef USE_ROCR_20 case hipDeviceAttributeMemoryClockRate: cdattr = cudaDevAttrMemoryClockRate; break; case hipDeviceAttributeMemoryBusWidth: cdattr = cudaDevAttrGlobalMemoryBusWidth; break; -#endif case hipDeviceAttributeMultiprocessorCount: cdattr = cudaDevAttrMultiProcessorCount; break; case hipDeviceAttributeComputeMode: diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 282b363a95..912dec77de 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1227,6 +1227,8 @@ hipError_t hipDeviceReset(void) } #endif + // TODO - reset all streams on the device. + return ihipLogStatus(hipSuccess); } diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 09c0ca7162..cc6af0b5d2 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -41,6 +41,10 @@ if (${HIP_PLATFORM} STREQUAL "hcc") elseif (${HIP_PLATFORM} STREQUAL "nvcc") MESSAGE ("HIP_PLATFORM=nvcc") + + #Need C++11 for threads in some of the tests. + add_definitions(-std=c++11) + # NVCC does not not support -rdynamic option set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS ) set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ) @@ -118,6 +122,7 @@ make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisio make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp) make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) +make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -133,6 +138,7 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size. make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy make_test(hipGridLaunch " " ) make_test(hipPointerAttrib " " ) +make_test(hipMultiThreadStreams " " ) make_test(hipMemcpy " " ) make_test(hipMemcpyAsync " " ) diff --git a/tests/src/hipMemcpy.cpp b/tests/src/hipMemcpy.cpp index 3fa499aa2a..b2bfc63fe5 100644 --- a/tests/src/hipMemcpy.cpp +++ b/tests/src/hipMemcpy.cpp @@ -63,12 +63,6 @@ void simpleTest1() } -class hipMemcpy; -class hipMemcpyAsync; - - - - //--- // Test many different kinds of memory copies. @@ -86,7 +80,7 @@ void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, boo size_t sizeElements = numElements * sizeof(T); printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", __func__, - typeid(T).name(), + TYPENAME(T), sizeElements, sizeElements/1024.0/1024.0, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); @@ -176,7 +170,7 @@ template void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) { printSep(); - printf ("test: %s<%s>\n", __func__, typeid(T).name()); + printf ("test: %s<%s>\n", __func__, TYPENAME(T)); int deviceId; HIPCHECK(hipGetDevice(&deviceId)); @@ -206,7 +200,7 @@ template void multiThread_1(bool serialize, bool usePinnedHost) { printSep(); - printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, typeid(T).name(), serialize, usePinnedHost); + printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); std::thread t1 (memcpytest2,N, usePinnedHost,0,0,0); if (serialize) { t1.join(); diff --git a/tests/src/hipMultiThreadStreams.cpp b/tests/src/hipMultiThreadStreams.cpp new file mode 100644 index 0000000000..a3dd94e077 --- /dev/null +++ b/tests/src/hipMultiThreadStreams.cpp @@ -0,0 +1,142 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip_runtime.h" +#include "test_common.h" + + + +void printSep() +{ + printf ("======================================================================================\n"); +} + +//--- +// Test simple H2D copies and back. +// Designed to stress a small number of simple smoke tests + +template< + typename T=float, + class P=HipTest::Unpinned, + class C=HipTest::Memcpy +> +void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream) +{ + using HipTest::MemTraits; + + std::thread::id pid = std::this_thread::get_id(); + + printf ("test: %s <%s> %s %s\n", __func__, TYPENAME(T), P::str(), C::str()); + size_t Nbytes = numElements*sizeof(T); + printf ("numElements=%zu Nbytes=%6.2fMB\n", numElements, Nbytes/1024.0/1024.0); + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned); + + + for (int i=0; i::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + + MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); + + HIPCHECK (hipDeviceSynchronize()); + + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + } + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned); + HIPCHECK (hipDeviceSynchronize()); + + std::cout <<" pid" << pid << " success\n"; +} + +template +void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t stream1, bool serialize) +{ + printSep(); + printf ("%s\n", __func__); + std::cout << testName << std::endl; + + // Test 2 threads operating on same stream: + std::thread t1 (simpleVectorCopy, 2000000/*mb*/, 1000, stream0); + if (serialize) { + t1.join(); + } + std::thread t2 (simpleVectorCopy, 2000000/*mb*/, 1000, stream1); + if (serialize) { + t2.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + } + + HIPCHECK(hipDeviceSynchronize()); +}; + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + + if (p_tests & 0x1) { + HIPCHECK ( hipDeviceReset() ); + + hipStream_t stream; + HIPCHECK (hipStreamCreate(&stream)); + + simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); + simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); + + //HIPCHECK(hipStreamDestroy(stream)); + } + + + if (p_tests & 0x2) { + hipStream_t stream0, stream1; + HIPCHECK (hipStreamCreate(&stream0)); + HIPCHECK (hipStreamCreate(&stream1)); + + // Easy tests to verify the test works - these don't allow overlap between the threads: + test_multiThread_1 ("Multithread NULL with serialized", NULL, NULL, true); + test_multiThread_1 ("Multithread with serialized", stream0, stream1, true); + + test_multiThread_1 ("Multithread with NULL stream", NULL, NULL, false); + test_multiThread_1 ("Multithread with two streams", stream0, stream1, false); + test_multiThread_1 ("Multithread with one stream", stream0, stream0, false); + } + + passed(); + +} diff --git a/tests/src/test_common.h b/tests/src/test_common.h index 1bf89f1604..e37eec7e86 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -16,6 +16,16 @@ #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" + + +#ifdef __HIP_PLATFORM_HCC +#define TYPENAME(T) typeid(T).name() +#else +#define TYPENAME(T) "?" +#endif + + + #define passed() \ printf ("%sPASSED!%s\n",KGRN, KNRM);\ exit(0); @@ -82,12 +92,12 @@ vectorADD(hipLaunchParm lp, const T *A_d, const T *B_d, T *C_d, - size_t N) + size_t NELEM) { size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); size_t stride = hipBlockDim_x * hipGridDim_x ; - for (size_t i=offset; i struct MemTraits; + + +template<> +struct MemTraits +{ + + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + { + HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind)); + } +}; + + +template<> +struct MemTraits +{ + + static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) + { + HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream)); + } +}; + }; // namespace HipTest