Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging
Šī revīzija ir iekļauta:
+1
-1
@@ -168,7 +168,7 @@ if ($needHipHcc) {
|
||||
if ((not -e $object) or ((stat($source))[9] > (stat($object))[9])) {
|
||||
my $CMD = "$HCC $HCCFLAGS -I$HSA_PATH/include -I$HIP_PATH/include -Wall -c $source -o $object";
|
||||
if ($verbose & 0x10) {
|
||||
$CMD .= " -g -O2" ;
|
||||
$CMD .= " -g -O0" ;
|
||||
} else {
|
||||
$CMD .= " -O3" ;
|
||||
}
|
||||
|
||||
@@ -290,7 +290,7 @@ hipcc adds the necessary libraries for HIP as well as for the accelerator compil
|
||||
|
||||
### -lm Option
|
||||
|
||||
hcc does not add -lm by default. If you see errors about missing math functions at link time (e.g., "sqrt@@GLIBC_2.2.5"), ensure that -lm is in the link options.
|
||||
hipcc adds -lm by default to the link command.
|
||||
|
||||
|
||||
## Linking Code With Other Compilers
|
||||
|
||||
@@ -275,6 +275,29 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
return hipCUDAErrorTohipError(cerror);
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){
|
||||
cudaPointerAttributes cPA;
|
||||
hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr));
|
||||
if(err == hipSuccess){
|
||||
switch (cPA.memoryType){
|
||||
case cudaMemoryTypeDevice:
|
||||
attributes->memoryType = hipMemoryTypeDevice; break;
|
||||
case cudaMemoryTypeHost:
|
||||
attributes->memoryType = hipMemoryTypeHost; break;
|
||||
default:
|
||||
return hipErrorUnknownSymbol;
|
||||
}
|
||||
attributes->device = cPA.device;
|
||||
attributes->devicePointer = cPA.devicePointer;
|
||||
attributes->hostPointer = cPA.hostPointer;
|
||||
attributes->isManaged = 0;
|
||||
attributes->allocationFlags = 0;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipMemGetInfo( size_t* free, size_t* total)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaMemGetInfo(free,total));
|
||||
|
||||
+8
-4
@@ -797,7 +797,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
|
||||
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
|
||||
|
||||
#ifdef USE_ROCR_V2
|
||||
#if USE_ROCR_V2
|
||||
// Get Max memory clock frequency
|
||||
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
|
||||
DeviceErrorCheck(err);
|
||||
@@ -1330,12 +1330,10 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
*pi = prop->regsPerBlock; break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop->clockRate; break;
|
||||
#ifdef USE_ROCR_V2
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop->memoryClockRate; break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop->memoryBusWidth; break;
|
||||
#endif
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop->multiProcessorCount; break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
@@ -1798,7 +1796,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
|
||||
attributes->hostPointer = amPointerInfo._hostPointer;
|
||||
attributes->devicePointer = amPointerInfo._devicePointer;
|
||||
attributes->isManaged = 0;
|
||||
|
||||
if(attributes->memoryType == hipMemoryTypeHost){
|
||||
attributes->hostPointer = ptr;
|
||||
}
|
||||
if(attributes->memoryType == hipMemoryTypeDevice){
|
||||
attributes->devicePointer = ptr;
|
||||
}
|
||||
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
|
||||
attributes->device = amPointerInfo._appId;
|
||||
|
||||
@@ -2370,6 +2373,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
|
||||
#endif
|
||||
/**
|
||||
* @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue
|
||||
* @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies.
|
||||
*/
|
||||
//---
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
|
||||
+24
-19
@@ -1,34 +1,39 @@
|
||||
Tests uses CMAKE as teh build infrastructure.
|
||||
|
||||
Use :
|
||||
|
||||
> mkdir build
|
||||
> cd build
|
||||
> cmake ../src
|
||||
> make
|
||||
> make test
|
||||
|
||||
```
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ../src
|
||||
$ make
|
||||
$ make test
|
||||
```
|
||||
|
||||
#-----
|
||||
# How to add a new test;
|
||||
### How to add a new test
|
||||
|
||||
# edit src/CMakeFiles to add the test:
|
||||
edit src/CMakeFiles to add the test:
|
||||
|
||||
# add the executable and list of required CPP files, ie:
|
||||
# make_test (EXE CPP_FILES)
|
||||
> make_hip_executable (hipMemset hipMemset.cpp)
|
||||
### add the executable and list of required CPP files, ie:
|
||||
```
|
||||
make_test (EXE CPP_FILES)
|
||||
make_hip_executable (hipMemset hipMemset.cpp)
|
||||
```
|
||||
|
||||
# Add to automated Test framework:
|
||||
# make_test (TESTNAME ARGS)
|
||||
> make_test(hipMemset " ")
|
||||
### Add to automated Test framework:
|
||||
```
|
||||
make_test (TESTNAME ARGS)
|
||||
make_test(hipMemset " ")
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Running tests:
|
||||
### Running tests:
|
||||
```
|
||||
make test
|
||||
```
|
||||
|
||||
# Run a specific test:
|
||||
```
|
||||
./hipMemset
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ size_t Nbytes = 0;
|
||||
// Utility Functions:
|
||||
//=================================================================================================
|
||||
|
||||
bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
|
||||
bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
|
||||
{
|
||||
return ((lhs.hostPointer == rhs.hostPointer) &&
|
||||
(lhs.devicePointer == rhs.devicePointer) &&
|
||||
@@ -50,7 +50,7 @@ bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &r
|
||||
};
|
||||
|
||||
|
||||
bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
|
||||
bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs)
|
||||
{
|
||||
return ! (lhs == rhs);
|
||||
}
|
||||
@@ -66,7 +66,7 @@ const char *memoryTypeToString(hipMemoryType memoryType)
|
||||
}
|
||||
|
||||
|
||||
void resetAttribs(hipPointerAttribute_t *attribs)
|
||||
void resetAttribs(hipPointerAttribute_t *attribs)
|
||||
{
|
||||
attribs->hostPointer = (void*) (-1);
|
||||
attribs->devicePointer = (void*) (-1);
|
||||
@@ -77,9 +77,9 @@ void resetAttribs(hipPointerAttribute_t *attribs)
|
||||
};
|
||||
|
||||
|
||||
void printAttribs(const hipPointerAttribute_t *attribs)
|
||||
void printAttribs(const hipPointerAttribute_t *attribs)
|
||||
{
|
||||
printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n",
|
||||
printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n",
|
||||
attribs->hostPointer,
|
||||
attribs->devicePointer,
|
||||
memoryTypeToString(attribs->memoryType),
|
||||
@@ -90,7 +90,7 @@ void printAttribs(const hipPointerAttribute_t *attribs)
|
||||
};
|
||||
|
||||
|
||||
inline int zrand(int max)
|
||||
inline int zrand(int max)
|
||||
{
|
||||
return rand() % max;
|
||||
}
|
||||
@@ -101,7 +101,7 @@ inline int zrand(int max)
|
||||
//=================================================================================================
|
||||
//--
|
||||
//Run through a couple simple cases to test lookups and host pointer arithmetic:
|
||||
void testSimple()
|
||||
void testSimple()
|
||||
{
|
||||
printf ("\n");
|
||||
printf ("===========================================================================\n");
|
||||
@@ -135,22 +135,22 @@ void testSimple()
|
||||
resetAttribs(&attribs2);
|
||||
HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+100));
|
||||
printf("getAttr:%-20s", "A_d+100"); printAttribs(&attribs2);
|
||||
HIPASSERT(attribs == attribs2);
|
||||
HIPASSERT((char*)attribs.devicePointer+100 == (char*)attribs2.devicePointer);
|
||||
|
||||
// Corner case at end of array:
|
||||
resetAttribs(&attribs2);
|
||||
HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+Nbytes-1));
|
||||
printf("getAttr:%-20s", "A_d+NBytes-1"); printAttribs(&attribs2);
|
||||
HIPASSERT(attribs == attribs2);
|
||||
printf("getAttr:%-20s", "A_d+Nbytes-1"); printAttribs(&attribs2);
|
||||
HIPASSERT((char*)attribs.devicePointer+Nbytes-1 == (char*)attribs2.devicePointer);
|
||||
|
||||
// Pointer just beyond array - must be invalid or at least a different pointer
|
||||
resetAttribs(&attribs2);
|
||||
e = hipPointerGetAttributes(&attribs2, A_d+Nbytes+1);
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
|
||||
if (e != hipErrorInvalidValue) {
|
||||
// We might have strayed into another pointer area.
|
||||
printf("getAttr:%-20s", "A_d+NBytes"); printAttribs(&attribs2);
|
||||
HIPASSERT(attribs.devicePointer != attribs2.devicePointer);
|
||||
HIPASSERT((char*)attribs.devicePointer != (char*)attribs2.devicePointer);
|
||||
}
|
||||
|
||||
|
||||
@@ -174,26 +174,26 @@ void testSimple()
|
||||
resetAttribs(&attribs2);
|
||||
HIPCHECK( hipPointerGetAttributes(&attribs2, A_Pinned_h+Nbytes/2));
|
||||
printf("getAttr:%-20s", "A_pinned_h+NBytes/2"); printAttribs(&attribs2);
|
||||
HIPASSERT(attribs == attribs2);
|
||||
HIPASSERT((char*)attribs.hostPointer+Nbytes/2 == (char*)attribs2.hostPointer);
|
||||
|
||||
|
||||
hipFreeHost(A_Pinned_h);
|
||||
e = hipPointerGetAttributes(&attribs, A_Pinned_h);
|
||||
HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error.
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e));
|
||||
|
||||
|
||||
// OS memory
|
||||
printf ("\nOS-allocated memory (malloc)\n");
|
||||
e = hipPointerGetAttributes(&attribs, A_OSAlloc_h);
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e));
|
||||
printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e));
|
||||
HIPASSERT(e == hipErrorInvalidValue); // OS-allocated pointers should return hipErrorInvalidValue.
|
||||
}
|
||||
|
||||
//---
|
||||
//Reset the memory tracker (remove allocations from all known devices):
|
||||
//This frees any memory allocated through the runtime.
|
||||
//The routine will not release any
|
||||
//The routine will not release any
|
||||
void resetTracker ()
|
||||
{
|
||||
if (p_verbose & 0x1) {
|
||||
@@ -232,8 +232,8 @@ void checkPointer(SuperPointerAttribute &ref, int major, int minor, void *pointe
|
||||
HIPCHECK(e);
|
||||
printf(" ref :: "); printAttribs(&ref._attrib);
|
||||
printf(" getattr:: "); printAttribs(&attribs);
|
||||
|
||||
HIPASSERT(attribs == ref._attrib);
|
||||
|
||||
HIPASSERT(attribs != ref._attrib);
|
||||
} else {
|
||||
if (p_verbose & 0x1) {
|
||||
printf("#%4d.%d GOOD:%p getattr :: ",major, minor, pointer); printAttribs(&attribs);
|
||||
@@ -303,7 +303,7 @@ void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize)
|
||||
size_t free, total;
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
HIPCHECK(hipMemGetInfo(&free, &total));
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n",
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n",
|
||||
i, free, (float)(free/1024.0/1024.0), totalDeviceAllocated[i], (float)(totalDeviceAllocated[i])/1024.0/1024.0, total, (float)(total/1024.0/1024.0));
|
||||
HIPASSERT(free + totalDeviceAllocated[i] <= total);
|
||||
}
|
||||
@@ -432,9 +432,9 @@ void thread_noise_generator(int iters, size_t numBuffers, Dir addDir, Dir remove
|
||||
//---
|
||||
//Multi-thread test that is effective at catching locking errors in the alloc/dealloc/tracker.
|
||||
//The query thread repeately requests information on the same block of memory.
|
||||
//Meanwhile, the thread_noise_generator registers a large number of blocks, and
|
||||
//Meanwhile, the thread_noise_generator registers a large number of blocks, and
|
||||
//then unregisters them. This causes a large amount of rebalancing in the tree
|
||||
//structure and will generate errors unless the locks in the tracker are preventing reading
|
||||
//structure and will generate errors unless the locks in the tracker are preventing reading
|
||||
//while writing.
|
||||
void testMultiThreaded_2()
|
||||
{
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user