Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging

Šī revīzija ir iekļauta:
pensun
2016-02-27 04:25:28 -06:00
revīzija 07e56d4666
6 mainīti faili ar 78 papildinājumiem un 46 dzēšanām
+1 -1
Parādīt failu
@@ -168,7 +168,7 @@ if ($needHipHcc) {
if ((not -e $object) or ((stat($source))[9] > (stat($object))[9])) {
my $CMD = "$HCC $HCCFLAGS -I$HSA_PATH/include -I$HIP_PATH/include -Wall -c $source -o $object";
if ($verbose & 0x10) {
$CMD .= " -g -O2" ;
$CMD .= " -g -O0" ;
} else {
$CMD .= " -O3" ;
}
+1 -1
Parādīt failu
@@ -290,7 +290,7 @@ hipcc adds the necessary libraries for HIP as well as for the accelerator compil
### -lm Option
hcc does not add “-lm” by default. If you see errors about missing math functions at link time (e.g., "sqrt@@GLIBC_2.2.5"), ensure that “-lm” is in the link options.
hipcc adds -lm by default to the link command.
## Linking Code With Other Compilers
@@ -275,6 +275,29 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
return hipCUDAErrorTohipError(cerror);
}
inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){
cudaPointerAttributes cPA;
hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
if(err == hipSuccess){
switch (cPA.memoryType){
case cudaMemoryTypeDevice:
attributes->memoryType = hipMemoryTypeDevice; break;
case cudaMemoryTypeHost:
attributes->memoryType = hipMemoryTypeHost; break;
default:
return hipErrorUnknownSymbol;
}
attributes->device = cPA.device;
attributes->devicePointer = cPA.devicePointer;
attributes->hostPointer = cPA.hostPointer;
attributes->isManaged = 0;
attributes->allocationFlags = 0;
}
return err;
}
inline static hipError_t hipMemGetInfo( size_t* free, size_t* total)
{
return hipCUDAErrorTohipError(cudaMemGetInfo(free,total));
+8 -4
Parādīt failu
@@ -797,7 +797,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
#ifdef USE_ROCR_V2
#if USE_ROCR_V2
// Get Max memory clock frequency
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
DeviceErrorCheck(err);
@@ -1330,12 +1330,10 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
*pi = prop->regsPerBlock; break;
case hipDeviceAttributeClockRate:
*pi = prop->clockRate; break;
#ifdef USE_ROCR_V2
case hipDeviceAttributeMemoryClockRate:
*pi = prop->memoryClockRate; break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop->memoryBusWidth; break;
#endif
case hipDeviceAttributeMultiprocessorCount:
*pi = prop->multiProcessorCount; break;
case hipDeviceAttributeComputeMode:
@@ -1798,7 +1796,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
attributes->hostPointer = amPointerInfo._hostPointer;
attributes->devicePointer = amPointerInfo._devicePointer;
attributes->isManaged = 0;
if(attributes->memoryType == hipMemoryTypeHost){
attributes->hostPointer = ptr;
}
if(attributes->memoryType == hipMemoryTypeDevice){
attributes->devicePointer = ptr;
}
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
attributes->device = amPointerInfo._appId;
@@ -2370,6 +2373,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
#endif
/**
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
*/
//---
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
+24 -19
Parādīt failu
@@ -1,34 +1,39 @@
Tests uses CMAKE as teh build infrastructure.
Use :
> mkdir build
> cd build
> cmake ../src
> make
> make test
```
$ mkdir build
$ cd build
$ cmake ../src
$ make
$ make test
```
#-----
# How to add a new test;
### How to add a new test
# edit src/CMakeFiles to add the test:
edit src/CMakeFiles to add the test:
# add the executable and list of required CPP files, ie:
# make_test (EXE CPP_FILES)
> make_hip_executable (hipMemset hipMemset.cpp)
### add the executable and list of required CPP files, ie:
```
make_test (EXE CPP_FILES)
make_hip_executable (hipMemset hipMemset.cpp)
```
# Add to automated Test framework:
# make_test (TESTNAME ARGS)
> make_test(hipMemset " ")
### Add to automated Test framework:
```
make_test (TESTNAME ARGS)
make_test(hipMemset " ")
```
# Running tests:
### Running tests:
```
make test
```
# Run a specific test:
```
./hipMemset
```
+21 -21
Parādīt failu
@@ -38,7 +38,7 @@ size_t Nbytes = 0;
// Utility Functions:
//=================================================================================================
bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
{
return ((lhs.hostPointer == rhs.hostPointer) &&
(lhs.devicePointer == rhs.devicePointer) &&
@@ -50,7 +50,7 @@ bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &r
};
bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
{
return ! (lhs == rhs);
}
@@ -66,7 +66,7 @@ const char *memoryTypeToString(hipMemoryType memoryType)
}
void resetAttribs(hipPointerAttribute_t *attribs)
void resetAttribs(hipPointerAttribute_t *attribs)
{
attribs->hostPointer = (void*) (-1);
attribs->devicePointer = (void*) (-1);
@@ -77,9 +77,9 @@ void resetAttribs(hipPointerAttribute_t *attribs)
};
void printAttribs(const hipPointerAttribute_t *attribs)
void printAttribs(const hipPointerAttribute_t *attribs)
{
printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n",
printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n",
attribs->hostPointer,
attribs->devicePointer,
memoryTypeToString(attribs->memoryType),
@@ -90,7 +90,7 @@ void printAttribs(const hipPointerAttribute_t *attribs)
};
inline int zrand(int max)
inline int zrand(int max)
{
return rand() % max;
}
@@ -101,7 +101,7 @@ inline int zrand(int max)
//=================================================================================================
//--
//Run through a couple simple cases to test lookups and host pointer arithmetic:
void testSimple()
void testSimple()
{
printf ("\n");
printf ("===========================================================================\n");
@@ -135,22 +135,22 @@ void testSimple()
resetAttribs(&attribs2);
HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+100));
printf("getAttr:%-20s", "A_d+100"); printAttribs(&attribs2);
HIPASSERT(attribs == attribs2);
HIPASSERT((char*)attribs.devicePointer+100 == (char*)attribs2.devicePointer);
// Corner case at end of array:
resetAttribs(&attribs2);
HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+Nbytes-1));
printf("getAttr:%-20s", "A_d+NBytes-1"); printAttribs(&attribs2);
HIPASSERT(attribs == attribs2);
printf("getAttr:%-20s", "A_d+Nbytes-1"); printAttribs(&attribs2);
HIPASSERT((char*)attribs.devicePointer+Nbytes-1 == (char*)attribs2.devicePointer);
// Pointer just beyond array - must be invalid or at least a different pointer
resetAttribs(&attribs2);
e = hipPointerGetAttributes(&attribs2, A_d+Nbytes+1);
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
if (e != hipErrorInvalidValue) {
// We might have strayed into another pointer area.
printf("getAttr:%-20s", "A_d+NBytes"); printAttribs(&attribs2);
HIPASSERT(attribs.devicePointer != attribs2.devicePointer);
HIPASSERT((char*)attribs.devicePointer != (char*)attribs2.devicePointer);
}
@@ -174,26 +174,26 @@ void testSimple()
resetAttribs(&attribs2);
HIPCHECK( hipPointerGetAttributes(&attribs2, A_Pinned_h+Nbytes/2));
printf("getAttr:%-20s", "A_pinned_h+NBytes/2"); printAttribs(&attribs2);
HIPASSERT(attribs == attribs2);
HIPASSERT((char*)attribs.hostPointer+Nbytes/2 == (char*)attribs2.hostPointer);
hipFreeHost(A_Pinned_h);
e = hipPointerGetAttributes(&attribs, A_Pinned_h);
HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error.
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
// OS memory
printf ("\nOS-allocated memory (malloc)\n");
e = hipPointerGetAttributes(&attribs, A_OSAlloc_h);
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e));
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e));
HIPASSERT(e == hipErrorInvalidValue); // OS-allocated pointers should return hipErrorInvalidValue.
}
//---
//Reset the memory tracker (remove allocations from all known devices):
//This frees any memory allocated through the runtime.
//The routine will not release any
//The routine will not release any
void resetTracker ()
{
if (p_verbose & 0x1) {
@@ -232,8 +232,8 @@ void checkPointer(SuperPointerAttribute &ref, int major, int minor, void *pointe
HIPCHECK(e);
printf(" ref :: "); printAttribs(&ref._attrib);
printf(" getattr:: "); printAttribs(&attribs);
HIPASSERT(attribs == ref._attrib);
HIPASSERT(attribs != ref._attrib);
} else {
if (p_verbose & 0x1) {
printf("#%4d.%d GOOD:%p getattr :: ",major, minor, pointer); printAttribs(&attribs);
@@ -303,7 +303,7 @@ void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize)
size_t free, total;
HIPCHECK(hipSetDevice(i));
HIPCHECK(hipMemGetInfo(&free, &total));
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n",
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n",
i, free, (float)(free/1024.0/1024.0), totalDeviceAllocated[i], (float)(totalDeviceAllocated[i])/1024.0/1024.0, total, (float)(total/1024.0/1024.0));
HIPASSERT(free + totalDeviceAllocated[i] <= total);
}
@@ -432,9 +432,9 @@ void thread_noise_generator(int iters, size_t numBuffers, Dir addDir, Dir remove
//---
//Multi-thread test that is effective at catching locking errors in the alloc/dealloc/tracker.
//The query thread repeately requests information on the same block of memory.
//Meanwhile, the thread_noise_generator registers a large number of blocks, and
//Meanwhile, the thread_noise_generator registers a large number of blocks, and
//then unregisters them. This causes a large amount of rebalancing in the tree
//structure and will generate errors unless the locks in the tracker are preventing reading
//structure and will generate errors unless the locks in the tracker are preventing reading
//while writing.
void testMultiThreaded_2()
{