Merge branch 'memtracker' into privatestaging
Conflicts: include/nvcc_detail/hip_runtime_api.h
This commit is contained in:
@@ -43,6 +43,11 @@ THE SOFTWARE.
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
|
||||
#include <hip_common.h>
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) and not defined (__HIP_PLATFORM_NVCC__)
|
||||
|
||||
@@ -242,12 +242,10 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
cdattr = cudaDevAttrMaxRegistersPerBlock; break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
cdattr = cudaDevAttrClockRate; break;
|
||||
#ifdef USE_ROCR_20
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
cdattr = cudaDevAttrMemoryClockRate; break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
cdattr = cudaDevAttrGlobalMemoryBusWidth; break;
|
||||
#endif
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
cdattr = cudaDevAttrMultiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
|
||||
@@ -1227,6 +1227,8 @@ hipError_t hipDeviceReset(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO - reset all streams on the device.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,10 @@ if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
|
||||
elseif (${HIP_PLATFORM} STREQUAL "nvcc")
|
||||
MESSAGE ("HIP_PLATFORM=nvcc")
|
||||
|
||||
#Need C++11 for threads in some of the tests.
|
||||
add_definitions(-std=c++11)
|
||||
|
||||
# NVCC does not not support -rdynamic option
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS )
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS )
|
||||
@@ -118,6 +122,7 @@ make_hip_executable (hipMathFunctionsHost hipMathFunctions.cpp hipSinglePrecisio
|
||||
make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecisionMathDevice.cpp hipDoublePrecisionMathDevice.cpp)
|
||||
make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp)
|
||||
make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp)
|
||||
make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp)
|
||||
target_link_libraries(hipMathFunctionsHost m)
|
||||
|
||||
make_test(hip_ballot " " )
|
||||
@@ -133,6 +138,7 @@ make_test(hipMemset --N 10013 --memsetval 0x5a ) # oddball size.
|
||||
make_test(hipMemset --N 256M --memsetval 0xa6 ) # big copy
|
||||
make_test(hipGridLaunch " " )
|
||||
make_test(hipPointerAttrib " " )
|
||||
make_test(hipMultiThreadStreams " " )
|
||||
|
||||
make_test(hipMemcpy " " )
|
||||
make_test(hipMemcpyAsync " " )
|
||||
|
||||
@@ -63,12 +63,6 @@ void simpleTest1()
|
||||
}
|
||||
|
||||
|
||||
class hipMemcpy;
|
||||
class hipMemcpyAsync;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//---
|
||||
// Test many different kinds of memory copies.
|
||||
@@ -86,7 +80,7 @@ void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, boo
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n",
|
||||
__func__,
|
||||
typeid(T).name(),
|
||||
TYPENAME(T),
|
||||
sizeElements, sizeElements/1024.0/1024.0,
|
||||
usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault);
|
||||
|
||||
@@ -176,7 +170,7 @@ template<typename T>
|
||||
void memcpytest2_sizes(size_t maxElem=0, size_t offset=0)
|
||||
{
|
||||
printSep();
|
||||
printf ("test: %s<%s>\n", __func__, typeid(T).name());
|
||||
printf ("test: %s<%s>\n", __func__, TYPENAME(T));
|
||||
|
||||
int deviceId;
|
||||
HIPCHECK(hipGetDevice(&deviceId));
|
||||
@@ -206,7 +200,7 @@ template<typename T>
|
||||
void multiThread_1(bool serialize, bool usePinnedHost)
|
||||
{
|
||||
printSep();
|
||||
printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, typeid(T).name(), serialize, usePinnedHost);
|
||||
printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost);
|
||||
std::thread t1 (memcpytest2<T>,N, usePinnedHost,0,0,0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
|
||||
|
||||
void printSep()
|
||||
{
|
||||
printf ("======================================================================================\n");
|
||||
}
|
||||
|
||||
//---
|
||||
// Test simple H2D copies and back.
|
||||
// Designed to stress a small number of simple smoke tests
|
||||
|
||||
template<
|
||||
typename T=float,
|
||||
class P=HipTest::Unpinned,
|
||||
class C=HipTest::Memcpy
|
||||
>
|
||||
void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
|
||||
{
|
||||
using HipTest::MemTraits;
|
||||
|
||||
std::thread::id pid = std::this_thread::get_id();
|
||||
|
||||
printf ("test: %s <%s> %s %s\n", __func__, TYPENAME(T), P::str(), C::str());
|
||||
size_t Nbytes = numElements*sizeof(T);
|
||||
printf ("numElements=%zu Nbytes=%6.2fMB\n", numElements, Nbytes/1024.0/1024.0);
|
||||
|
||||
T *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned);
|
||||
|
||||
|
||||
for (int i=0; i<iters; i++) {
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
|
||||
|
||||
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
|
||||
|
||||
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
|
||||
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, numElements);
|
||||
}
|
||||
|
||||
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned);
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
std::cout <<" pid" << pid << " success\n";
|
||||
}
|
||||
|
||||
template<typename T, class C>
|
||||
void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t stream1, bool serialize)
|
||||
{
|
||||
printSep();
|
||||
printf ("%s\n", __func__);
|
||||
std::cout << testName << std::endl;
|
||||
|
||||
// Test 2 threads operating on same stream:
|
||||
std::thread t1 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 1000, stream0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
}
|
||||
std::thread t2 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 1000, stream1);
|
||||
if (serialize) {
|
||||
t2.join();
|
||||
}
|
||||
|
||||
if (!serialize) {
|
||||
t1.join();
|
||||
t2.join();
|
||||
}
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
};
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
printf ("info: set device to %d\n", p_gpuDevice);
|
||||
HIPCHECK(hipSetDevice(p_gpuDevice));
|
||||
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK (hipStreamCreate(&stream));
|
||||
|
||||
simpleVectorCopy<float, HipTest::Pinned, HipTest::MemcpyAsync> (2000000/*mb*/, 10/*iters*/, stream);
|
||||
simpleVectorCopy<float, HipTest::Pinned, HipTest::Memcpy> (2000000/*mb*/, 10/*iters*/, stream);
|
||||
|
||||
//HIPCHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
hipStream_t stream0, stream1;
|
||||
HIPCHECK (hipStreamCreate(&stream0));
|
||||
HIPCHECK (hipStreamCreate(&stream1));
|
||||
|
||||
// Easy tests to verify the test works - these don't allow overlap between the threads:
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread NULL with serialized", NULL, NULL, true);
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with serialized", stream0, stream1, true);
|
||||
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with one stream", stream0, stream0, false);
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
@@ -16,6 +16,16 @@
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC
|
||||
#define TYPENAME(T) typeid(T).name()
|
||||
#else
|
||||
#define TYPENAME(T) "?"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define passed() \
|
||||
printf ("%sPASSED!%s\n",KGRN, KNRM);\
|
||||
exit(0);
|
||||
@@ -82,12 +92,12 @@ vectorADD(hipLaunchParm lp,
|
||||
const T *A_d,
|
||||
const T *B_d,
|
||||
T *C_d,
|
||||
size_t N)
|
||||
size_t NELEM)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
|
||||
for (size_t i=offset; i<N; i+=stride) {
|
||||
for (size_t i=offset; i<NELEM; i+=stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
@@ -220,4 +230,70 @@ void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true
|
||||
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
struct Pinned {
|
||||
static const bool isPinned = true;
|
||||
static const char *str() { return "Pinned"; };
|
||||
|
||||
static void *Alloc(size_t sizeBytes)
|
||||
{
|
||||
void *p;
|
||||
HIPCHECK(hipMallocHost(&p, sizeBytes));
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
struct Unpinned
|
||||
{
|
||||
static const bool isPinned = false;
|
||||
static const char *str() { return "Unpinned"; };
|
||||
|
||||
static void *Alloc(size_t sizeBytes)
|
||||
{
|
||||
void *p = malloc (sizeBytes);
|
||||
HIPASSERT(p);
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct Memcpy
|
||||
{
|
||||
static const char *str() { return "Memcpy"; };
|
||||
};
|
||||
|
||||
struct MemcpyAsync
|
||||
{
|
||||
static const char *str() { return "MemcpyAsync"; };
|
||||
};
|
||||
|
||||
|
||||
template <typename C> struct MemTraits;
|
||||
|
||||
|
||||
template<>
|
||||
struct MemTraits<Memcpy>
|
||||
{
|
||||
|
||||
static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct MemTraits<MemcpyAsync>
|
||||
{
|
||||
|
||||
static void Copy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
|
||||
}
|
||||
};
|
||||
|
||||
}; // namespace HipTest
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user