From 2ab19ca50555f89768deb13c87a59cdee6f31938 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 10 Jun 2016 20:12:46 -0500 Subject: [PATCH] NVCC improvements. - Complete translation tables for cudaError <-> hipError_t. - Remove some odd errors that were not correctly translated or not used. - Add HIPCHECK_API to test infrastructure. Used for negative testing an API ; if a mismatch occurs it shows the expected return error code. Can also print a warning rather than error. - Enable hipMemoryAllocate on NV system, and review error coded. - Add hipErrorName to nvcc. Change-Id: I680427dcf32a5796d5913cf9e7f3b4c6f6b91599 Conflicts: tests/src/CMakeLists.txt Bug fixes and improved docs for hipFree and hipHostFree. - Passing NULL pointer initialized runtime and return hipSuccess (not an error like before). - add negative test for this. (hipMemoryAllocate, improved) - Match NVCC errors for invalid pointers, add to test. - Update hipFree and hipHostFree docs. - hipGetDevicePointer always set *devicePointer=NULL, even for invalid flags. - Gate shared memory usage on specific HCC work-week. Change-Id: I533b4fd3280a3d6cdbf05eb768976f0c7506c012 --- include/hcc_detail/hip_runtime_api.h | 14 +++--- include/hip_runtime_api.h | 6 +-- include/nvcc_detail/hip_runtime_api.h | 65 ++++++++++++++++++--------- src/device_util.cpp | 13 ++++++ src/hip_hcc.cpp | 8 ++-- src/hip_memory.cpp | 16 +++++-- tests/src/CMakeLists.txt | 4 +- tests/src/hipMemoryAllocate.cpp | 29 ++++++------ tests/src/test_common.h | 18 ++++++++ 9 files changed, 120 insertions(+), 53 deletions(-) diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 6d32068481..3fd06aae38 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -764,28 +764,32 @@ hipError_t hipHostUnregister(void* hostPtr) ; /** * @brief Free memory allocated by the hcc hip memory allocation API. * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. * * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess, #hipErrorMemoryFree + * @return #hipSuccess + * @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated with hipHostMalloc) */ hipError_t hipFree(void* ptr); /** - * @brief Free memory allocated by the hcc hip host memory allocation API + * @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated.] * - * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess, #hipErrorMemoryFree + * @see hipHostFree */ hipError_t hipFreeHost(void* ptr) __attribute__((deprecated("use hipHostFree instead"))) ; /** * @brief Free memory allocated by the hcc hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. * * @param[in] ptr Pointer to memory to be freed - * @return #hipSuccess, #hipErrorMemoryFree + * @return #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with hipMalloc) */ hipError_t hipHostFree(void* ptr); diff --git a/include/hip_runtime_api.h b/include/hip_runtime_api.h index d468184218..e4719ec5b1 100644 --- a/include/hip_runtime_api.h +++ b/include/hip_runtime_api.h @@ -139,12 +139,12 @@ typedef struct hipPointerAttribute_t { * @ingroup Enumerations */ // Developer note - when updating these, update the hipErrorName and hipErrorString functions in NVCC and HCC paths +// Also update the hipCUDAErrorTohipError function in NVCC path. + typedef enum hipError_t { hipSuccess = 0 ///< Successful completion. ,hipErrorMemoryAllocation ///< Memory allocation error. - ,hipErrorMemoryFree ///< Memory free error. - ,hipErrorUnknownSymbol ///< Unknown symbol. - ,hipErrorOutOfResources ///< Out of resources error. + ,hipErrorLaunchOutOfResources ///< Out of resources error. ,hipErrorInvalidValue ///< One or more of the parameters passed to the API call is NULL or not in an acceptable range. ,hipErrorInvalidResourceHandle ///< Resource handle (hipEvent_t or hipStream_t) invalid. ,hipErrorInvalidDevice ///< DeviceID must be in range 0...#compute-devices. diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 5434555668..404ad38484 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -65,29 +65,50 @@ typedef cudaStream_t hipStream_t; inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { switch(cuError) { -case cudaSuccess: - return hipSuccess; -case cudaErrorMemoryAllocation: - return hipErrorMemoryAllocation; -case cudaErrorInvalidDevicePointer: -case cudaErrorInitializationError: - return hipErrorMemoryFree; -default: - return hipErrorUnknown; -} + case cudaSuccess : return hipSuccess; + case cudaErrorMemoryAllocation : return hipErrorMemoryAllocation ; + case cudaErrorLaunchOutOfResources : return hipErrorLaunchOutOfResources ; + case cudaErrorInvalidValue : return hipErrorInvalidValue ; + case cudaErrorInvalidResourceHandle : return hipErrorInvalidResourceHandle ; + case cudaErrorInvalidDevice : return hipErrorInvalidDevice ; + case cudaErrorInvalidMemcpyDirection : return hipErrorInvalidMemcpyDirection ; + case cudaErrorInvalidDevicePointer : return hipErrorInvalidDevicePointer ; + case cudaErrorInitializationError : return hipErrorInitializationError ; + case cudaErrorNoDevice : return hipErrorNoDevice ; + case cudaErrorNotReady : return hipErrorNotReady ; + case cudaErrorUnknown : return hipErrorUnknown ; + case cudaErrorPeerAccessNotEnabled : return hipErrorPeerAccessNotEnabled ; + case cudaErrorPeerAccessAlreadyEnabled : return hipErrorPeerAccessAlreadyEnabled ; + case cudaErrorHostMemoryAlreadyRegistered : return hipErrorHostMemoryAlreadyRegistered ; + case cudaErrorHostMemoryNotRegistered : return hipErrorHostMemoryNotRegistered ; + default : return hipErrorUnknown; // Note - translated error. +}; } + // TODO match the error enum names of hip and cuda inline static cudaError_t hipErrorToCudaError(hipError_t hError) { switch(hError) { -case hipSuccess: - return cudaSuccess; -case hipErrorMemoryAllocation: - return cudaErrorMemoryAllocation; -case hipErrorMemoryFree: - return cudaErrorInitializationError; -default: - return cudaErrorUnknown; -} + case hipSuccess : return cudaSuccess; + case hipErrorMemoryAllocation : return cudaErrorMemoryAllocation ; + case hipErrorLaunchOutOfResources : return cudaErrorLaunchOutOfResources ; + case hipErrorInvalidValue : return cudaErrorInvalidValue ; + case hipErrorInvalidResourceHandle : return cudaErrorInvalidResourceHandle ; + case hipErrorInvalidDevice : return cudaErrorInvalidDevice ; + case hipErrorInvalidMemcpyDirection : return cudaErrorInvalidMemcpyDirection ; + case hipErrorInvalidDevicePointer : return cudaErrorInvalidDevicePointer ; + case hipErrorInitializationError : return cudaErrorInitializationError ; + case hipErrorNoDevice : return cudaErrorNoDevice ; + case hipErrorNotReady : return cudaErrorNotReady ; + case hipErrorUnknown : return cudaErrorUnknown ; + case hipErrorPeerAccessNotEnabled : return cudaErrorPeerAccessNotEnabled ; + case hipErrorPeerAccessAlreadyEnabled : return cudaErrorPeerAccessAlreadyEnabled ; + case hipErrorRuntimeMemory : return cudaErrorUnknown ; // Does not exist in CUDA + case hipErrorRuntimeOther : return cudaErrorUnknown ; // Does not exist in CUDA + case hipErrorHostMemoryAlreadyRegistered : return cudaErrorHostMemoryAlreadyRegistered ; + case hipErrorHostMemoryNotRegistered : return cudaErrorHostMemoryNotRegistered ; + case hipErrorTbd : return cudaErrorUnknown; // Note - translated error. + default : return cudaErrorUnknown; // Note - translated error. +} } inline static cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { @@ -178,6 +199,10 @@ inline static const char* hipGetErrorString(hipError_t error){ return cudaGetErrorString( hipErrorToCudaError(error) ); } +inline static const char* hipGetErrorName(hipError_t error){ + return cudaGetErrorName( hipErrorToCudaError(error) ); +} + inline static hipError_t hipGetDeviceCount(int * count){ return hipCUDAErrorTohipError(cudaGetDeviceCount(count)); } @@ -329,7 +354,7 @@ inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attribut case cudaMemoryTypeHost: attributes->memoryType = hipMemoryTypeHost; break; default: - return hipErrorUnknownSymbol; + return hipErrorUnknown; } attributes->device = cPA.device; attributes->devicePointer = cPA.devicePointer; diff --git a/src/device_util.cpp b/src/device_util.cpp index cb97f04038..db46aa1f71 100644 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -27,6 +27,12 @@ THE SOFTWARE. using namespace hc::precise_math; #endif +#if __hcc_workweek__ > 16186 +#define USE_DYNAMIC_SHARED 1 +#else +#define USE_DYNAMIC_SHARED 0 +#endif + #define HIP_SQRT_2 1.41421356237 #define __hip_erfinva3 -0.140543331 @@ -606,6 +612,11 @@ __device__ float __hip_y1f(float x) return ret; } +#if __hcc_workweek__ > 16186 +#define USE_DYNAMIC_SHARED 1 +#else +#define USE_DYNAMIC_SHARED 0 +#endif __device__ float acosf(float x) { @@ -1634,10 +1645,12 @@ __host__ __device__ int max(int arg1, int arg2) return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); } +#if USE_DYNAMIC_SHARED __device__ __attribute__((address_space(3))) void* __get_dynamicgroupbaseptr() { return hc::get_dynamic_group_segment_base_pointer(); } +#endif diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 61b102e476..5aa80ff07d 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -100,7 +100,7 @@ hsa_agent_t g_cpu_agent; ihipSignal_t::ihipSignal_t() : _sig_id(0) { if (hsa_signal_create(0/*value*/, 0, NULL, &_hsa_signal) != HSA_STATUS_SUCCESS) { - throw ihipException(hipErrorOutOfResources); + throw ihipException(hipErrorRuntimeMemory); } //tprintf (DB_SIGNAL, " allocated hsa_signal=%lu\n", (_hsa_signal.handle)); } @@ -110,7 +110,7 @@ ihipSignal_t::~ihipSignal_t() { tprintf (DB_SIGNAL, " destroy hsa_signal #%lu (#%lu)\n", (_hsa_signal.handle), _sig_id); if (hsa_signal_destroy(_hsa_signal) != HSA_STATUS_SUCCESS) { - throw ihipException(hipErrorOutOfResources); + throw ihipException(hipErrorRuntimeOther); } }; @@ -1137,9 +1137,7 @@ const char *ihipErrorString(hipError_t hip_error) switch (hip_error) { case hipSuccess : return "hipSuccess"; case hipErrorMemoryAllocation : return "hipErrorMemoryAllocation"; - case hipErrorMemoryFree : return "hipErrorMemoryFree"; - case hipErrorUnknownSymbol : return "hipErrorUnknownSymbol"; - case hipErrorOutOfResources : return "hipErrorOutOfResources"; + case hipErrorLaunchOutOfResources : return "hipErrorLaunchOutOfResources"; case hipErrorInvalidValue : return "hipErrorInvalidValue"; case hipErrorInvalidResourceHandle : return "hipErrorInvalidResourceHandle"; case hipErrorInvalidDevice : return "hipErrorInvalidDevice"; diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index e7867d817c..9769229ab4 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -89,6 +89,8 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi hipError_t e = hipSuccess; + *devicePointer = NULL; + // Flags must be 0: if (flags != 0) { e = hipErrorInvalidValue; @@ -100,7 +102,6 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi *devicePointer = amPointerInfo._devicePointer; } else { e = hipErrorMemoryAllocation; - *devicePointer = NULL; } } return ihipLogStatus(e); @@ -530,6 +531,9 @@ hipError_t hipFree(void* ptr) hipStatus = hipSuccess; } } + } else { + // free NULL pointer succeeds and is common technique to initialize runtime + hipStatus = hipSuccess; } return ihipLogStatus(hipStatus); @@ -540,10 +544,11 @@ hipError_t hipHostFree(void* ptr) { HIP_INIT_API(ptr); - // TODO - ensure this pointer was created by hipMallocHost and not hipMalloc - std::call_once(hip_initialized, ihipInit); + // Synchronize to ensure all work has finished. + ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. - hipError_t hipStatus = hipErrorInvalidDevicePointer; + + hipError_t hipStatus = hipErrorInvalidValue; if (ptr) { hc::accelerator acc; hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); @@ -554,6 +559,9 @@ hipError_t hipHostFree(void* ptr) hipStatus = hipSuccess; } } + } else { + // free NULL pointer succeeds and is common technique to initialize runtime + hipStatus = hipSuccess; } return ihipLogStatus(hipStatus); diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index eb2fd603c2..d758d3c54f 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -200,6 +200,8 @@ make_hip_executable (hipTestMemcpyPin hipTestMemcpyPin.cpp) make_hip_executable (hipDynamicShared hipDynamicShared.cpp) make_hip_executable (hipTestDevice hipTestDevice.cpp) make_hip_executable (hipTestDeviceDouble hipTestDeviceDouble.cpp) + + make_test(hip_ballot " " ) make_test(hip_anyall " " ) make_test(hip_popc " " ) @@ -245,6 +247,7 @@ make_test(hipTestDevice " ") make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-serial" --tests 0x1) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-pyramid" --tests 0x4) make_named_test (hipMultiThreadDevice "hipMultiThreadDevice-nearzero" --tests 0x10) +make_test(hipMemoryAllocate " ") if (${HIP_MULTI_GPU}) make_test(hipPeerToPeer_simple " ") # use current device for copy, this fails. @@ -254,7 +257,6 @@ if (${HIP_MULTI_GPU}) endif() if (${HIP_PLATFORM} STREQUAL "hcc") - make_test(hipMemoryAllocate " ") make_test(hipFuncSetDevice " ") endif() diff --git a/tests/src/hipMemoryAllocate.cpp b/tests/src/hipMemoryAllocate.cpp index 30da822eaf..eeba7fd345 100644 --- a/tests/src/hipMemoryAllocate.cpp +++ b/tests/src/hipMemoryAllocate.cpp @@ -28,24 +28,23 @@ int main(){ hipHostMalloc((void**)&Bd, SIZE, hipHostMallocDefault); hipHostMalloc((void**)&Bm, SIZE, hipHostMallocMapped); hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped); - hipHostGetDevicePointer((void**)&Cd, C, SIZE); - HIPASSERT(hipFree(Ad) == hipSuccess); - HIPASSERT(hipHostFree(Ad) == hipErrorInvalidDevicePointer); + hipHostGetDevicePointer((void**)&Cd, C, 0/*flags*/); - HIPASSERT(hipFree(B) == hipErrorInvalidDevicePointer); - HIPASSERT(hipFree(Bd) == hipErrorInvalidDevicePointer); - HIPASSERT(hipFree(Bm) == hipErrorInvalidDevicePointer); - HIPASSERT(hipHostFree(Bd) == hipSuccess); - HIPASSERT(hipHostFree(Bm) == hipSuccess); + HIPCHECK_API(hipFree(Ad) , hipSuccess); + HIPCHECK_API(hipHostFree(Ad) , hipErrorInvalidValue); - HIPASSERT(hipFree(C) == hipErrorInvalidDevicePointer); - HIPASSERT(hipFree(Cd) == hipErrorInvalidDevicePointer); - HIPASSERT(hipHostFree(C) == hipSuccess); - HIPASSERT(hipHostFree(Cd) == hipErrorInvalidDevicePointer); - HIPASSERT(hipFree(Cd) == hipErrorInvalidDevicePointer); + HIPCHECK_API(hipFree(B) , hipErrorInvalidDevicePointer); // try to hipFree on malloced memory + HIPCHECK_API(hipFree(Bd) , hipErrorInvalidDevicePointer); + HIPCHECK_API(hipFree(Bm) , hipErrorInvalidDevicePointer); + HIPCHECK_API(hipHostFree(Bd) , hipSuccess); + HIPCHECK_API(hipHostFree(Bm) , hipSuccess); - HIPASSERT(hipFree(NULL) == hipErrorInvalidDevicePointer); - HIPASSERT(hipHostFree(NULL) == hipErrorInvalidDevicePointer); + HIPCHECK_API(hipFree(C) , hipErrorInvalidDevicePointer); + HIPCHECK_API(hipHostFree(C) , hipSuccess); + + + HIPCHECK_API(hipFree(NULL) , hipSuccess); + HIPCHECK_API(hipHostFree(NULL) , hipSuccess); passed(); } diff --git a/tests/src/test_common.h b/tests/src/test_common.h index ce85b3898a..47e5f63a5b 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -57,6 +57,12 @@ THE SOFTWARE. printf ("error: TEST FAILED\n%s", KNRM );\ abort(); +#define warn(...) \ + printf ("%swarn: ", KYEL);\ + printf (__VA_ARGS__);\ + printf ("\n");\ + printf ("warn: TEST WARNING\n%s", KNRM );\ + #define HIPCHECK(error) \ {\ @@ -76,6 +82,18 @@ THE SOFTWARE. __FILE__, __LINE__,KNRM); \ } + +#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \ +{\ + hipError_t _e = (API_CALL);\ + if (_e != (EXPECTED_ERROR) ) { \ + failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", \ + KRED, #API_CALL, _e, hipGetErrorName(_e), \ + EXPECTED_ERROR, hipGetErrorName(EXPECTED_ERROR), \ + __FILE__, __LINE__,KNRM); \ + }\ +} + // standard command-line variables: extern size_t N; extern char memsetval;