diff --git a/bin/hipcc b/bin/hipcc index ee58ba535a..29163d7c02 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -168,7 +168,7 @@ if ($needHipHcc) { if ((not -e $object) or ((stat($source))[9] > (stat($object))[9])) { my $CMD = "$HCC $HCCFLAGS -I$HSA_PATH/include -I$HIP_PATH/include -Wall -c $source -o $object"; if ($verbose & 0x10) { - $CMD .= " -g -O2" ; + $CMD .= " -g -O0" ; } else { $CMD .= " -O3" ; } diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index 9f1c7c67bd..cb599a5c4a 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -290,7 +290,7 @@ hipcc adds the necessary libraries for HIP as well as for the accelerator compil ### -lm Option -hcc does not add “-lm” by default. If you see errors about missing math functions at link time (e.g., "sqrt@@GLIBC_2.2.5"), ensure that “-lm” is in the link options. +hipcc adds -lm by default to the link command. ## Linking Code With Other Compilers diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index d0050a3734..a8408211b2 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -275,6 +275,29 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att return hipCUDAErrorTohipError(cerror); } + +inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){ + cudaPointerAttributes cPA; + hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); + if(err == hipSuccess){ + switch (cPA.memoryType){ + case cudaMemoryTypeDevice: + attributes->memoryType = hipMemoryTypeDevice; break; + case cudaMemoryTypeHost: + attributes->memoryType = hipMemoryTypeHost; break; + default: + return hipErrorUnknownSymbol; + } + attributes->device = cPA.device; + attributes->devicePointer = cPA.devicePointer; + attributes->hostPointer = cPA.hostPointer; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + } + return err; +} + + inline static hipError_t hipMemGetInfo( size_t* free, size_t* total) { return hipCUDAErrorTohipError(cudaMemGetInfo(free,total)); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index aa9759fc2b..945248c11f 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -797,7 +797,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) // Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size. prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; -#ifdef USE_ROCR_V2 +#if USE_ROCR_V2 // Get Max memory clock frequency //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); DeviceErrorCheck(err); @@ -1330,12 +1330,10 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) *pi = prop->regsPerBlock; break; case hipDeviceAttributeClockRate: *pi = prop->clockRate; break; -#ifdef USE_ROCR_V2 case hipDeviceAttributeMemoryClockRate: *pi = prop->memoryClockRate; break; case hipDeviceAttributeMemoryBusWidth: *pi = prop->memoryBusWidth; break; -#endif case hipDeviceAttributeMultiprocessorCount: *pi = prop->multiProcessorCount; break; case hipDeviceAttributeComputeMode: @@ -1798,7 +1796,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr) attributes->hostPointer = amPointerInfo._hostPointer; attributes->devicePointer = amPointerInfo._devicePointer; attributes->isManaged = 0; - + if(attributes->memoryType == hipMemoryTypeHost){ + attributes->hostPointer = ptr; + } + if(attributes->memoryType == hipMemoryTypeDevice){ + attributes->devicePointer = ptr; + } attributes->allocationFlags = amPointerInfo._appAllocationFlags; attributes->device = amPointerInfo._appId; @@ -2370,6 +2373,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind #endif /** * @result #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidMemcpyDirection, #hipErrorInvalidValue + * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. */ //--- hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) diff --git a/tests/README.md b/tests/README.md index 30c7173b35..96de22b558 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,34 +1,39 @@ Tests uses CMAKE as teh build infrastructure. Use : - -> mkdir build -> cd build -> cmake ../src -> make -> make test - +``` +$ mkdir build +$ cd build +$ cmake ../src +$ make +$ make test +``` #----- -# How to add a new test; +### How to add a new test -# edit src/CMakeFiles to add the test: +edit src/CMakeFiles to add the test: -# add the executable and list of required CPP files, ie: -# make_test (EXE CPP_FILES) -> make_hip_executable (hipMemset hipMemset.cpp) +### add the executable and list of required CPP files, ie: +``` +make_test (EXE CPP_FILES) +make_hip_executable (hipMemset hipMemset.cpp) +``` -# Add to automated Test framework: -# make_test (TESTNAME ARGS) -> make_test(hipMemset " ") +### Add to automated Test framework: +``` +make_test (TESTNAME ARGS) +make_test(hipMemset " ") +``` - -# Running tests: +### Running tests: +``` make test +``` # Run a specific test: +``` ./hipMemset - - +``` diff --git a/tests/src/hipPointerAttrib.cpp b/tests/src/hipPointerAttrib.cpp index 6928ec9a64..8bed5af869 100644 --- a/tests/src/hipPointerAttrib.cpp +++ b/tests/src/hipPointerAttrib.cpp @@ -38,7 +38,7 @@ size_t Nbytes = 0; // Utility Functions: //================================================================================================= -bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) +bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) { return ((lhs.hostPointer == rhs.hostPointer) && (lhs.devicePointer == rhs.devicePointer) && @@ -50,7 +50,7 @@ bool operator==(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &r }; -bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) +bool operator!=(const hipPointerAttribute_t &lhs, const hipPointerAttribute_t &rhs) { return ! (lhs == rhs); } @@ -66,7 +66,7 @@ const char *memoryTypeToString(hipMemoryType memoryType) } -void resetAttribs(hipPointerAttribute_t *attribs) +void resetAttribs(hipPointerAttribute_t *attribs) { attribs->hostPointer = (void*) (-1); attribs->devicePointer = (void*) (-1); @@ -77,9 +77,9 @@ void resetAttribs(hipPointerAttribute_t *attribs) }; -void printAttribs(const hipPointerAttribute_t *attribs) +void printAttribs(const hipPointerAttribute_t *attribs) { - printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n", + printf ("hostPointer:%p devicePointer:%p memoryType:%s deviceId:%d isManaged:%d allocationFlags:%u\n", attribs->hostPointer, attribs->devicePointer, memoryTypeToString(attribs->memoryType), @@ -90,7 +90,7 @@ void printAttribs(const hipPointerAttribute_t *attribs) }; -inline int zrand(int max) +inline int zrand(int max) { return rand() % max; } @@ -101,7 +101,7 @@ inline int zrand(int max) //================================================================================================= //-- //Run through a couple simple cases to test lookups and host pointer arithmetic: -void testSimple() +void testSimple() { printf ("\n"); printf ("===========================================================================\n"); @@ -135,22 +135,22 @@ void testSimple() resetAttribs(&attribs2); HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+100)); printf("getAttr:%-20s", "A_d+100"); printAttribs(&attribs2); - HIPASSERT(attribs == attribs2); + HIPASSERT((char*)attribs.devicePointer+100 == (char*)attribs2.devicePointer); // Corner case at end of array: resetAttribs(&attribs2); HIPCHECK( hipPointerGetAttributes(&attribs2, A_d+Nbytes-1)); - printf("getAttr:%-20s", "A_d+NBytes-1"); printAttribs(&attribs2); - HIPASSERT(attribs == attribs2); + printf("getAttr:%-20s", "A_d+Nbytes-1"); printAttribs(&attribs2); + HIPASSERT((char*)attribs.devicePointer+Nbytes-1 == (char*)attribs2.devicePointer); // Pointer just beyond array - must be invalid or at least a different pointer resetAttribs(&attribs2); e = hipPointerGetAttributes(&attribs2, A_d+Nbytes+1); - printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); if (e != hipErrorInvalidValue) { // We might have strayed into another pointer area. printf("getAttr:%-20s", "A_d+NBytes"); printAttribs(&attribs2); - HIPASSERT(attribs.devicePointer != attribs2.devicePointer); + HIPASSERT((char*)attribs.devicePointer != (char*)attribs2.devicePointer); } @@ -174,26 +174,26 @@ void testSimple() resetAttribs(&attribs2); HIPCHECK( hipPointerGetAttributes(&attribs2, A_Pinned_h+Nbytes/2)); printf("getAttr:%-20s", "A_pinned_h+NBytes/2"); printAttribs(&attribs2); - HIPASSERT(attribs == attribs2); + HIPASSERT((char*)attribs.hostPointer+Nbytes/2 == (char*)attribs2.hostPointer); hipFreeHost(A_Pinned_h); e = hipPointerGetAttributes(&attribs, A_Pinned_h); HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. - printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_d+NBytes", e, hipGetErrorString(e)); // OS memory printf ("\nOS-allocated memory (malloc)\n"); e = hipPointerGetAttributes(&attribs, A_OSAlloc_h); - printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e)); + printf("getAttr:%-20s err=%d (%s), neg-test expected\n", "A_OSAlloc_h", e, hipGetErrorString(e)); HIPASSERT(e == hipErrorInvalidValue); // OS-allocated pointers should return hipErrorInvalidValue. } //--- //Reset the memory tracker (remove allocations from all known devices): //This frees any memory allocated through the runtime. -//The routine will not release any +//The routine will not release any void resetTracker () { if (p_verbose & 0x1) { @@ -232,8 +232,8 @@ void checkPointer(SuperPointerAttribute &ref, int major, int minor, void *pointe HIPCHECK(e); printf(" ref :: "); printAttribs(&ref._attrib); printf(" getattr:: "); printAttribs(&attribs); - - HIPASSERT(attribs == ref._attrib); + + HIPASSERT(attribs != ref._attrib); } else { if (p_verbose & 0x1) { printf("#%4d.%d GOOD:%p getattr :: ",major, minor, pointer); printAttribs(&attribs); @@ -303,7 +303,7 @@ void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) size_t free, total; HIPCHECK(hipSetDevice(i)); HIPCHECK(hipMemGetInfo(&free, &total)); - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n", + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) clusterAllocTotalDevice=%lu (%4.2fMB) total=%zu (%4.2fMB)\n", i, free, (float)(free/1024.0/1024.0), totalDeviceAllocated[i], (float)(totalDeviceAllocated[i])/1024.0/1024.0, total, (float)(total/1024.0/1024.0)); HIPASSERT(free + totalDeviceAllocated[i] <= total); } @@ -432,9 +432,9 @@ void thread_noise_generator(int iters, size_t numBuffers, Dir addDir, Dir remove //--- //Multi-thread test that is effective at catching locking errors in the alloc/dealloc/tracker. //The query thread repeately requests information on the same block of memory. -//Meanwhile, the thread_noise_generator registers a large number of blocks, and +//Meanwhile, the thread_noise_generator registers a large number of blocks, and //then unregisters them. This causes a large amount of rebalancing in the tree -//structure and will generate errors unless the locks in the tracker are preventing reading +//structure and will generate errors unless the locks in the tracker are preventing reading //while writing. void testMultiThreaded_2() {