From bd2327927108e226315251b0fb025e4e6c385199 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 4 Mar 2016 06:00:04 -0600 Subject: [PATCH 01/20] v2: Fixed implementation of hipMemcpyDefault flag --- src/hip_hcc.cpp | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index e35b911bac..b92762f24a 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2412,21 +2412,33 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp } else { if (kind == hipMemcpyDefault) { - std::cout<<"hipMemcpyDefault"<getSignal(); hsa_signal_store_relaxed(ihip_signal->_hsa_signal, 1); From 4b271ec013cd21f593e38c009af626342df68a37 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 5 Mar 2016 19:30:29 -0600 Subject: [PATCH 02/20] Added canMapHostMemory to hipDeviceProp --- include/hip_runtime_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hip_runtime_api.h b/include/hip_runtime_api.h index 61e2b17407..f73ebeeb9b 100644 --- a/include/hip_runtime_api.h +++ b/include/hip_runtime_api.h @@ -97,6 +97,7 @@ typedef struct hipDeviceProp_t { int pciDeviceID; ///< PCI Device ID. size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not. + int canMapHostMemory; ///< Check whether HIP can map host memory } hipDeviceProp_t; From af4edd277f24e4b50891d1a93e91047774b7e0e4 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 5 Mar 2016 13:06:37 -0600 Subject: [PATCH 03/20] Added canMapHostMemory feature --- include/nvcc_detail/hip_runtime_api.h | 1 + tests/src/hipHostAlloc.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/src/hipHostAlloc.cpp diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index a8408211b2..1413d735f6 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -177,6 +177,7 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev p_prop->l2CacheSize = cdprop.l2CacheSize ; p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ; p_prop->computeMode = cdprop.computeMode ; + p_prop->canMapHostMemory = cdprop.canMapHostMemory; // Same as clock-rate: p_prop->clockInstructionRate = cdprop.clockRate; diff --git a/tests/src/hipHostAlloc.cpp b/tests/src/hipHostAlloc.cpp new file mode 100644 index 0000000000..b8acdeb0b3 --- /dev/null +++ b/tests/src/hipHostAlloc.cpp @@ -0,0 +1,20 @@ +#include +#include"test_common.h" + +__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +Cd[tx] = Ad[tx] + Bd[tx]; +} + +int main(){ +float *A, *B, *C; +float *Ad, *Bd, *Cd; + +hipDeviceProp_t prop; +int device; +hipGetDevice(&device); +hipDeviceGetProperties(&prop, device); +if(prop.canMapHostMemory != 1){ +std::cout<<"Exiting.."< Date: Sat, 5 Mar 2016 13:08:57 -0600 Subject: [PATCH 04/20] Revert "Added canMapHostMemory feature" This reverts commit af4edd277f24e4b50891d1a93e91047774b7e0e4. --- include/nvcc_detail/hip_runtime_api.h | 1 - tests/src/hipHostAlloc.cpp | 20 -------------------- 2 files changed, 21 deletions(-) delete mode 100644 tests/src/hipHostAlloc.cpp diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 1413d735f6..a8408211b2 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -177,7 +177,6 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev p_prop->l2CacheSize = cdprop.l2CacheSize ; p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ; p_prop->computeMode = cdprop.computeMode ; - p_prop->canMapHostMemory = cdprop.canMapHostMemory; // Same as clock-rate: p_prop->clockInstructionRate = cdprop.clockRate; diff --git a/tests/src/hipHostAlloc.cpp b/tests/src/hipHostAlloc.cpp deleted file mode 100644 index b8acdeb0b3..0000000000 --- a/tests/src/hipHostAlloc.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include"test_common.h" - -__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){ -int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; -Cd[tx] = Ad[tx] + Bd[tx]; -} - -int main(){ -float *A, *B, *C; -float *Ad, *Bd, *Cd; - -hipDeviceProp_t prop; -int device; -hipGetDevice(&device); -hipDeviceGetProperties(&prop, device); -if(prop.canMapHostMemory != 1){ -std::cout<<"Exiting.."< Date: Sat, 5 Mar 2016 13:15:07 -0600 Subject: [PATCH 05/20] v2 Added canHostMapMemory --- include/nvcc_detail/hip_runtime_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index a8408211b2..1413d735f6 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -177,6 +177,7 @@ inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int dev p_prop->l2CacheSize = cdprop.l2CacheSize ; p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ; p_prop->computeMode = cdprop.computeMode ; + p_prop->canMapHostMemory = cdprop.canMapHostMemory; // Same as clock-rate: p_prop->clockInstructionRate = cdprop.clockRate; From f479531be588796a6290efcd12eb2719b13d6d03 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 5 Mar 2016 13:58:56 -0600 Subject: [PATCH 06/20] Added hipHostAlloc feature for CUDA --- bin/hipify | 7 ++++- include/nvcc_detail/hip_runtime_api.h | 13 +++++++++ tests/src/CMakeLists.txt | 4 +-- tests/src/hipHostAlloc.cpp | 42 +++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tests/src/hipHostAlloc.cpp diff --git a/bin/hipify b/bin/hipify index c49598f4ef..811f6ac036 100755 --- a/bin/hipify +++ b/bin/hipify @@ -286,7 +286,12 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaMallocHost\b/hipMallocHost/g; $ft{'mem'} += s/\bcudaFree\b/hipFree/g; $ft{'mem'} += s/\bcudaFreeHost\b/hipFreeHost/g; - + $ft{'mem'} += s/\bcudaHostAlloc\b/hipHostAlloc/g; + $ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g; + $ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostAllocDefault/g; + $ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostAllocPortable/g; + $ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostAllocMapped/g; + $ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostAllocWriteCombined/g; #-------- diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 1413d735f6..1d99ba28f2 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -50,6 +50,10 @@ hipMemcpyHostToHost } hipTextureFilterMode;*/ #define hipFilterModePoint cudaFilterModePoint +#define hipHostAllocDefault cudaHostAllocDefault +#define hipHostAllocPortable cudaHostAllocPortable +#define hipHostAllocMapped cudaHostAllocMapped +#define hipHostAllocWriteCombined cudaHostAllocWriteCombined typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; @@ -115,6 +119,15 @@ inline static hipError_t hipFree(void* ptr) { inline static hipError_t hipMallocHost(void** ptr, size_t size) { return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); } + +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags){ + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + +inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags){ + return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); +} + inline static hipError_t hipFreeHost(void* ptr) { return hipCUDAErrorTohipError(cudaFreeHost(ptr)); } diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 44becb0ed3..4454ac73fb 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -126,7 +126,7 @@ make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrin make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp) make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp) - +make_hip_executable (hipHostAlloc hipHostAlloc.cpp) make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) @@ -146,7 +146,7 @@ make_test(hipEnvVarDriver " " ) make_test(hipPointerAttrib " " ) make_test(hipMultiThreadStreams1 " " ) make_test(hipMultiThreadStreams2 " " ) - +make_test(hipHostAlloc " ") make_test(hipMemcpy " " ) make_test(hipMemcpyAsync " " ) diff --git a/tests/src/hipHostAlloc.cpp b/tests/src/hipHostAlloc.cpp new file mode 100644 index 0000000000..7ad6f4b718 --- /dev/null +++ b/tests/src/hipHostAlloc.cpp @@ -0,0 +1,42 @@ +#include"test_common.h" + +#define LEN 1024*1024 +#define SIZE LEN*sizeof(float) + +__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +Cd[tx] = Ad[tx] + Bd[tx]; +} + +int main(){ +float *A, *B, *C; +float *Ad, *Bd, *Cd; + +hipDeviceProp_t prop; +int device; +HIPCHECK(hipGetDevice(&device)); +HIPCHECK(hipDeviceGetProperties(&prop, device)); +if(prop.canMapHostMemory != 1){ +std::cout<<"Exiting..."< Date: Sat, 5 Mar 2016 15:57:56 -0600 Subject: [PATCH 07/20] Added hipHostAlloc with hipHostAllocMapped flag --- include/hcc_detail/hip_runtime_api.h | 24 ++++++++++ src/hip_hcc.cpp | 67 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 5fe398b84c..fff6107d6b 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -56,6 +56,11 @@ extern "C" { #define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. +#define hipHostAllocDefault 0x0 +#define hipHostAllocPortable 0x1 +#define hipHostAllocMapped 0x2 +#define hipHostAllocWriteCombined 0x4 + /** * @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored. */ @@ -662,6 +667,25 @@ hipError_t hipMalloc(void** ptr, size_t size) ; */ hipError_t hipMallocHost(void** ptr, size_t size) ; +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * @param[in] flags Type of host memory allocation + * @return Error code + */ +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) ; + +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostAlloc + * + * @param[out] dstPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostAlloc + * @param[in] size Requested memory size + * @return Error code + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size) ; /** * @brief Free memory allocated by the hcc hip memory allocation API. diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index b92762f24a..b09f622b70 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2000,6 +2000,73 @@ hipError_t hipMallocHost(void** ptr, size_t sizeBytes) return ihipLogStatus(hip_status); } +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){ + std::call_once(hip_initialized, ihipInit); + + hipError_t hip_status = hipSuccess; + + auto device = ihipGetTlsDefaultDevice(); + + if(device){ + if(flags | hipHostAllocDefault){ + const unsigned am_flags = amHostPinned; + + *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + if(sizeBytes && (*ptr == NULL)){ + hip_status = hipErrorMemoryAllocation; + }else{ +#if USE_AM_TRACKER + hc::am_memtracker_update(*ptr, device->_device_index, 0); +#endif + } + tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); + } + if(flags | hipHostAllocMapped && device->_props.canMapHostMemory == 1){ + const unsigned am_flags = amHostPinned; + + *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + if(sizeBytes && (*ptr == NULL)){ + hip_status = hipErrorMemoryAllocation; + }else{ +#if USE_AM_TRACKER + hc::am_memtracker_update(*ptr, device->_device_index, 0); + void *srcPtr; + hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr); + assert(hsa_status == HSA_STATUS_SUCCESS); + hc::am_memtracker_add(srcPtr, sizeBytes, device->_acc, false); +#endif + } + tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); + } + } + return ihipLogStatus(hip_status); +} + +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){ + std::call_once(hip_initialized, ihipInit); + + hipError_t hip_status = hipSuccess; + + if(hstPtr == NULL){ + hip_status = hipErrorInvalidValue; + }else{ + +#if USE_AM_TRACKER + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hstPtr); + if(status == AM_SUCCESS){ + *devPtr = amPointerInfo._devicePointer; + if(devPtr == NULL){ + hip_status = hipErrorMemoryAllocation; + } + } +#endif + tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *devPtr); + } + return ihipLogStatus(hip_status); +} + //--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { From d3ba2b978275e772e33c2094348485cb3e123de0 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sun, 6 Mar 2016 08:31:04 -0600 Subject: [PATCH 08/20] corrected hipDeviceGetProperties to hipGetDeviceProperties - not docs --- RELEASE.md | 2 +- bin/hipify | 2 +- docs/markdown/hip_kernel_language.md | 2 +- docs/markdown/hip_porting_guide.md | 4 ++-- include/hcc_detail/hip_runtime_api.h | 4 ++-- include/nvcc_detail/hip_runtime_api.h | 2 +- samples/0_Intro/bit_extract/bit_extract.cpp | 2 +- samples/0_Intro/square/square.hipref.cpp | 2 +- samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- src/hip_hcc.cpp | 2 +- tests/src/hipEnvVar.cpp | 2 +- tests/src/hipGetDeviceAttribute.cpp | 2 +- tests/src/hipHcc.cpp | 2 +- tests/src/hipHostAlloc.cpp | 2 +- tests/src/hipSimpleAtomicsTest.cpp | 2 +- tests/src/hip_anyall.cpp | 2 +- tests/src/hip_ballot.cpp | 2 +- tests/src/hip_brev.cpp | 2 +- tests/src/hip_clz.cpp | 2 +- tests/src/hip_ffs.cpp | 2 +- tests/src/hip_popc.cpp | 2 +- tests/src/test_common.cpp | 2 +- util/gedit/hip.lang | 4 ++-- util/vim/hip.vim | 2 +- 24 files changed, 27 insertions(+), 27 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index ae0a0d2b4e..055dd0a60d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -23,7 +23,7 @@ Date: 2016.02.18 - Update Runtime Documentation. - Improve implementations of cross-lane operations (_ballot, _any, _all). - Provide shuffle intrinsics (performance optimization in-progress). -- Support hipDeviceAttribute for querying "one-shot" device attributes, as an alternative to hipDeviceGetProperties. +- Support hipDeviceAttribute for querying "one-shot" device attributes, as an alternative to hipGetDeviceProperties. =================================================================================================== diff --git a/bin/hipify b/bin/hipify index 811f6ac036..8ff04030fd 100755 --- a/bin/hipify +++ b/bin/hipify @@ -350,7 +350,7 @@ while (@ARGV) { #-------- # Device $ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g; - $ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipDeviceGetProperties/g; + $ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipGetDeviceProperties/g; # Attribute $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g; diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md index 4d3e72ce65..b54cf16613 100644 --- a/docs/markdown/hip_kernel_language.md +++ b/docs/markdown/hip_kernel_language.md @@ -427,7 +427,7 @@ Nvidia devices implement the timer as a per-compute-unit clock that increments o To obtain the clock frequency, use the hipDeviceProp_t.clockInstructionRate field: ``` -hipDeviceGetProperties(&deviceProps, deviceId); +hipGetDeviceProperties(&deviceProps, deviceId); // Compute time in ms--device_ticks is based on values reported from clock() device function float time = device_ticks / (float)deviceProps.clockInstructionRate; ``` diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index f4b1f8a7e4..6376593a98 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -215,10 +215,10 @@ For host code, the `__HIP_ARCH__*` defines are set to 0. You should only use the ### Device-Architecture Properties -Host code should query the architecture feature flags in the device properties that hipDeviceGetProperties returns, rather than testing the "major" and "minor" fields directly: +Host code should query the architecture feature flags in the device properties that hipGetDeviceProperties returns, rather than testing the "major" and "minor" fields directly: ``` -hipDeviceGetProperties(&deviceProp, device); +hipGetDeviceProperties(&deviceProp, device); //if ((deviceProp.major == 1 && deviceProp.minor < 2)) // non-portable if (deviceProp.arch.hasSharedInt32Atomics) { // portable HIP feature query // has shared int32 atomic operations ... diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index fff6107d6b..1588f73a5a 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -252,9 +252,9 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) * @param [out] prop written with device properties * @param [in] device which device to query for information * - * Populates hipDeviceGetProperties with information for the specified device. + * Populates hipGetDeviceProperties with information for the specified device. */ -hipError_t hipDeviceGetProperties(hipDeviceProp_t* prop, int device); +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int device); diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 1d99ba28f2..98e7ca004c 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -167,7 +167,7 @@ inline static hipError_t hipMemset(void* devPtr,int value, size_t count) { return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); } -inline static hipError_t hipDeviceGetProperties(hipDeviceProp_t *p_prop, int device) +inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int device) { cudaDeviceProp cdprop; cudaError_t cerror; diff --git a/samples/0_Intro/bit_extract/bit_extract.cpp b/samples/0_Intro/bit_extract/bit_extract.cpp index d32b65c15d..14b5be66d4 100644 --- a/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/samples/0_Intro/bit_extract/bit_extract.cpp @@ -60,7 +60,7 @@ int main(int argc, char *argv[]) int deviceId; CHECK (hipGetDevice(&deviceId)); hipDeviceProp_t props; - CHECK(hipDeviceGetProperties(&props, deviceId)); + CHECK(hipGetDeviceProperties(&props, deviceId)); printf ("info: running on device #%d %s\n", deviceId, props.name); diff --git a/samples/0_Intro/square/square.hipref.cpp b/samples/0_Intro/square/square.hipref.cpp index ed2a938279..5d53a8d584 100644 --- a/samples/0_Intro/square/square.hipref.cpp +++ b/samples/0_Intro/square/square.hipref.cpp @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) size_t Nbytes = N * sizeof(float); hipDeviceProp_t props; - CHECK(hipDeviceGetProperties(&props, 0/*deviceID*/)); + CHECK(hipGetDeviceProperties(&props, 0/*deviceID*/)); printf ("info: running on device %s\n", props.name); printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index 824ab17d37..146d17e015 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -73,7 +73,7 @@ void printDeviceProp (int deviceId) cout << setw(w1) << "device#" << deviceId << endl; hipDeviceProp_t props; - HIPCHECK(hipDeviceGetProperties(&props, deviceId)); + HIPCHECK(hipGetDeviceProperties(&props, deviceId)); cout << setw(w1) << "Name: " << props.name << endl; cout << setw(w1) << "pciBusID: " << props.pciBusID << endl; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index b09f622b70..e453fc2069 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1380,7 +1380,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) * @bug HCC always returns 0 for regsPerBlock * @bug HCC always returns 0 for l2CacheSize */ -hipError_t hipDeviceGetProperties(hipDeviceProp_t* props, int device) +hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { std::call_once(hip_initialized, ihipInit); diff --git a/tests/src/hipEnvVar.cpp b/tests/src/hipEnvVar.cpp index c1b6589fe2..229fa390c2 100644 --- a/tests/src/hipEnvVar.cpp +++ b/tests/src/hipEnvVar.cpp @@ -108,7 +108,7 @@ int main(int argc, char **argv) hipSetDevice(device); hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, device); + hipGetDeviceProperties(&devProp, device); if (devProp.major < 1) { printf("%d does not support HIP\n", device); return -1; diff --git a/tests/src/hipGetDeviceAttribute.cpp b/tests/src/hipGetDeviceAttribute.cpp index 0073dfeed7..51bf29f9e6 100644 --- a/tests/src/hipGetDeviceAttribute.cpp +++ b/tests/src/hipGetDeviceAttribute.cpp @@ -52,7 +52,7 @@ int main(int argc, char *argv[]) int deviceId; CHECK (hipGetDevice(&deviceId)); hipDeviceProp_t props; - CHECK(hipDeviceGetProperties(&props, deviceId)); + CHECK(hipGetDeviceProperties(&props, deviceId)); printf ("info: running on device #%d %s\n", deviceId, props.name); CHECK(test_hipDeviceGetAttribute(deviceId, hipDeviceAttributeMaxThreadsPerBlock, props.maxThreadsPerBlock)); diff --git a/tests/src/hipHcc.cpp b/tests/src/hipHcc.cpp index 2a8ae9b804..b3580db660 100644 --- a/tests/src/hipHcc.cpp +++ b/tests/src/hipHcc.cpp @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) int deviceId; CHECK (hipGetDevice(&deviceId)); hipDeviceProp_t props; - CHECK(hipDeviceGetProperties(&props, deviceId)); + CHECK(hipGetDeviceProperties(&props, deviceId)); printf ("info: running on device #%d %s\n", deviceId, props.name); #ifdef __HCC__ diff --git a/tests/src/hipHostAlloc.cpp b/tests/src/hipHostAlloc.cpp index 7ad6f4b718..a6401e469d 100644 --- a/tests/src/hipHostAlloc.cpp +++ b/tests/src/hipHostAlloc.cpp @@ -15,7 +15,7 @@ float *Ad, *Bd, *Cd; hipDeviceProp_t prop; int device; HIPCHECK(hipGetDevice(&device)); -HIPCHECK(hipDeviceGetProperties(&prop, device)); +HIPCHECK(hipGetDeviceProperties(&prop, device)); if(prop.canMapHostMemory != 1){ std::cout<<"Exiting..."< GPU device has %d Multi-Processors, " diff --git a/tests/src/hip_anyall.cpp b/tests/src/hip_anyall.cpp index 52a2a13db9..21e24d6443 100644 --- a/tests/src/hip_anyall.cpp +++ b/tests/src/hip_anyall.cpp @@ -41,7 +41,7 @@ __global__ void int main(int argc, char *argv[]) { int warpSize, pshift; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); if(strncmp(devProp.name,"Fiji",1)==0) { warpSize =64; pshift =6; diff --git a/tests/src/hip_ballot.cpp b/tests/src/hip_ballot.cpp index 17e86a12ad..5af4c32d7b 100644 --- a/tests/src/hip_ballot.cpp +++ b/tests/src/hip_ballot.cpp @@ -21,7 +21,7 @@ __global__ void int main(int argc, char *argv[]) { int warpSize, pshift; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); if(strncmp(devProp.name,"Fiji",1)==0) {warpSize = 64; pshift =6;} diff --git a/tests/src/hip_brev.cpp b/tests/src/hip_brev.cpp index f722ce1b45..e5c68a5a72 100644 --- a/tests/src/hip_brev.cpp +++ b/tests/src/hip_brev.cpp @@ -94,7 +94,7 @@ int main() { unsigned long long int* deviceD; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); cout << " System minor " << devProp.minor << endl; cout << " System major " << devProp.major << endl; cout << " agent prop name " << devProp.name << endl; diff --git a/tests/src/hip_clz.cpp b/tests/src/hip_clz.cpp index 2cf03148be..18b332b33f 100644 --- a/tests/src/hip_clz.cpp +++ b/tests/src/hip_clz.cpp @@ -118,7 +118,7 @@ int main() { long long int* deviceH; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); cout << " System minor " << devProp.minor << endl; cout << " System major " << devProp.major << endl; cout << " agent prop name " << devProp.name << endl; diff --git a/tests/src/hip_ffs.cpp b/tests/src/hip_ffs.cpp index 5d3ccb7f92..44f172b5f7 100644 --- a/tests/src/hip_ffs.cpp +++ b/tests/src/hip_ffs.cpp @@ -89,7 +89,7 @@ int main() { unsigned long long int* deviceD; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); cout << " System minor " << devProp.minor << endl; cout << " System major " << devProp.major << endl; cout << " agent prop name " << devProp.name << endl; diff --git a/tests/src/hip_popc.cpp b/tests/src/hip_popc.cpp index 0227bdb97c..9c8673891f 100644 --- a/tests/src/hip_popc.cpp +++ b/tests/src/hip_popc.cpp @@ -86,7 +86,7 @@ int main() { unsigned long long int* deviceD; hipDeviceProp_t devProp; - hipDeviceGetProperties(&devProp, 0); + hipGetDeviceProperties(&devProp, 0); cout << " System minor " << devProp.minor << endl; cout << " System major " << devProp.major << endl; cout << " agent prop name " << devProp.name << endl; diff --git a/tests/src/test_common.cpp b/tests/src/test_common.cpp index 3da5568b7c..332c2856d3 100644 --- a/tests/src/test_common.cpp +++ b/tests/src/test_common.cpp @@ -144,7 +144,7 @@ unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) int device; HIPCHECK(hipGetDevice(&device)); hipDeviceProp_t props; - HIPCHECK(hipDeviceGetProperties(&props, device)); + HIPCHECK(hipGetDeviceProperties(&props, device)); unsigned blocks = props.multiProcessorCount * blocksPerCU; if (blocks * threadsPerBlock > N) { diff --git a/util/gedit/hip.lang b/util/gedit/hip.lang index 76f3b84738..21652e22eb 100644 --- a/util/gedit/hip.lang +++ b/util/gedit/hip.lang @@ -226,8 +226,8 @@ hipD3D9SetDirect3DDevice hipD3D9UnmapResources hipD3D9UnregisterResource - hipDeviceGetProperties - hipDeviceSynchronize + hipGetDeviceProperties + hipDeviceSynchronize hipEventCreate hipEventDestroy hipEventElapsedTime diff --git a/util/vim/hip.vim b/util/vim/hip.vim index b64cf53c3f..6f843309c0 100644 --- a/util/vim/hip.vim +++ b/util/vim/hip.vim @@ -92,7 +92,7 @@ syn keyword hipFunctionName hipD3D9ResourceSetMapFlags syn keyword hipFunctionName hipD3D9SetDirect3DDevice syn keyword hipFunctionName hipD3D9UnmapResources syn keyword hipFunctionName hipD3D9UnregisterResource -syn keyword hipFunctionName hipDeviceGetProperties +syn keyword hipFunctionName hipGetDeviceProperties syn keyword hipFunctionName hipDeviceSynchronize syn keyword hipFunctionName hipDeviceReset syn keyword hipFunctionName hipEventCreate From 1f3695b22448a1cdab4d551cc85e165c87c8b33a Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sun, 6 Mar 2016 08:40:33 -0600 Subject: [PATCH 09/20] added flags support to be used later --- src/hip_hcc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index e453fc2069..8f12dd14c6 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2029,7 +2029,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){ hip_status = hipErrorMemoryAllocation; }else{ #if USE_AM_TRACKER - hc::am_memtracker_update(*ptr, device->_device_index, 0); + hc::am_memtracker_update(*ptr, device->_device_index, flags); void *srcPtr; hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr); assert(hsa_status == HSA_STATUS_SUCCESS); From 75952029d6197181fecdcfe6ae03697b4165fb9f Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sun, 6 Mar 2016 12:17:30 -0600 Subject: [PATCH 10/20] added feature for hipHostGetFlags for CUDA and HIP --- include/hcc_detail/hip_runtime_api.h | 9 ++++ include/nvcc_detail/hip_runtime_api.h | 4 ++ src/hip_hcc.cpp | 35 +++++++++++++--- tests/src/CMakeLists.txt | 3 +- tests/src/hipHostGetFlags.cpp | 59 +++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 tests/src/hipHostGetFlags.cpp diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 1588f73a5a..0d83dedd64 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -687,6 +687,15 @@ hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) ; */ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size) ; +/** + * @brief Get flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostAlloc + * @return Error code + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) ; + /** * @brief Free memory allocated by the hcc hip memory allocation API. * This API performs an implicit hipDeviceSynchronize() call. diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 98e7ca004c..783750d44d 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -128,6 +128,10 @@ inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, u return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); } +inline static hipError_ hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr){ + return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); +} + inline static hipError_t hipFreeHost(void* ptr) { return hipCUDAErrorTohipError(cudaFreeHost(ptr)); } diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 8f12dd14c6..a86616a14d 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2008,7 +2008,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){ auto device = ihipGetTlsDefaultDevice(); if(device){ - if(flags | hipHostAllocDefault){ + if(flags & hipHostAllocDefault){ const unsigned am_flags = amHostPinned; *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); @@ -2021,7 +2021,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){ } tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } - if(flags | hipHostAllocMapped && device->_props.canMapHostMemory == 1){ + if(flags & hipHostAllocMapped){ const unsigned am_flags = amHostPinned; *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); @@ -2030,10 +2030,10 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags){ }else{ #if USE_AM_TRACKER hc::am_memtracker_update(*ptr, device->_device_index, flags); - void *srcPtr; - hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr); - assert(hsa_status == HSA_STATUS_SUCCESS); - hc::am_memtracker_add(srcPtr, sizeBytes, device->_acc, false); +// void *srcPtr; +// hsa_status_t hsa_status = hsa_amd_memory_lock((*ptr), sizeBytes, &device->_hsa_agent, 1, &srcPtr); +// assert(hsa_status == HSA_STATUS_SUCCESS); +// hc::am_memtracker_add(srcPtr, sizeBytes, device->_acc, false); #endif } tprintf(TRACE_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); @@ -2067,6 +2067,29 @@ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size){ return ihipLogStatus(hip_status); } +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) +{ + std::call_once(hip_initialized, ihipInit); + hipError_t hip_status = hipSuccess; + + hc::accelerator acc; + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr); + if(status == AM_SUCCESS){ + *flagsPtr = amPointerInfo._appAllocationFlags; + if(*flagsPtr == 0){ + hip_status = hipErrorInvalidValue; + } + else{ + hip_status = hipSuccess; + } + tprintf(TRACE_MEM, " %s: host ptr=%p\n", __func__, hostPtr); + }else{ + hip_status = hipErrorInvalidValue; + } + return ihipLogStatus(hip_status); +} + //--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 4454ac73fb..0b69839e72 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -128,6 +128,7 @@ make_hip_executable (hipMultiThreadStreams1 hipMultiThreadStreams1.cpp) make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp) make_hip_executable (hipHostAlloc hipHostAlloc.cpp) make_hip_executable (hipStreamL5 hipStreamL5.cpp) +make_hip_executable (hipHostGetFlags hipHostGetFlags.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -149,7 +150,7 @@ make_test(hipMultiThreadStreams2 " " ) make_test(hipHostAlloc " ") make_test(hipMemcpy " " ) make_test(hipMemcpyAsync " " ) - +make_test(hipHostGetFlags " ") make_test(hipHcc " " ) make_test(hipStreamL5 " ") diff --git a/tests/src/hipHostGetFlags.cpp b/tests/src/hipHostGetFlags.cpp new file mode 100644 index 0000000000..648c1f2343 --- /dev/null +++ b/tests/src/hipHostGetFlags.cpp @@ -0,0 +1,59 @@ +#include"test_common.h" +#include + +#define LEN 1024*1024 +#define SIZE LEN*sizeof(float) + +__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +Cd[tx] = Ad[tx] + Bd[tx]; +} + +int main(){ +float *A, *B, *C, *D; +float *Ad, *Bd, *Cd, *Dd; +unsigned int FlagA, FlagB, FlagC; +FlagA = hipHostAllocWriteCombined | hipHostAllocMapped; +FlagB = hipHostAllocWriteCombined | hipHostAllocMapped; +FlagC = hipHostAllocMapped; +hipDeviceProp_t prop; +int device; +HIPCHECK(hipGetDevice(&device)); +HIPCHECK(hipGetDeviceProperties(&prop, device)); +if(prop.canMapHostMemory != 1){ +std::cout<<"Exiting..."< Date: Mon, 7 Mar 2016 09:40:15 +0530 Subject: [PATCH 11/20] Fix typo in nvcc_detail/hip_runtime_api.h --- include/nvcc_detail/hip_runtime_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 783750d44d..6da81c742b 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -128,7 +128,7 @@ inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, u return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); } -inline static hipError_ hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr){ +inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr){ return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); } From faaee7e9bf736bd959dc50db9866533ba6506b07 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Mon, 7 Mar 2016 01:40:31 -0600 Subject: [PATCH 12/20] Increased size of arrays for single stream tests --- tests/src/hipStreamL5.cpp | 62 +++++++++++++-------------------------- 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/tests/src/hipStreamL5.cpp b/tests/src/hipStreamL5.cpp index f98718a67d..b9d3a03c94 100644 --- a/tests/src/hipStreamL5.cpp +++ b/tests/src/hipStreamL5.cpp @@ -58,7 +58,6 @@ void test12345(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -71,7 +70,7 @@ void test12345(){ H2HAsync(Bh, Ah, size, stream); H2DAsync(Ad, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2DAsync(Bd, Ad, size, stream); D2HAsync(Ch, Bd, size, stream); HIPCHECK(hipDeviceSynchronize()); @@ -85,7 +84,6 @@ void test13452(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -107,7 +105,7 @@ void test13452(){ H2D(Ad, Dh, size); H2HAsync(Bh, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2DAsync(Bd, Ad, size, stream); D2HAsync(Ch, Bd, size, stream); H2DAsync(Cd, Ch, size, stream); @@ -125,7 +123,6 @@ void test14523(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const int N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -150,7 +147,7 @@ void test14523(){ D2DAsync(Bd, Ad, size, stream); D2HAsync(Ch, Bd, size, stream); H2DAsync(Cd, Ch, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Cd); HIPCHECK(hipDeviceSynchronize()); @@ -165,7 +162,6 @@ void test15234(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -189,7 +185,7 @@ void test15234(){ H2HAsync(Bh, Ah, size, stream); D2HAsync(Ch, Ad, size, stream); H2DAsync(Bd, Ch, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2DAsync(Cd, Bd, size, stream); D2H(Eh, Cd, size); @@ -203,7 +199,6 @@ template void test23451(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -218,12 +213,12 @@ void test23451(){ setArray(Ah, N, T(1)); H2DAsync(Ad, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2DAsync(Bd, Ad, size, stream); D2HAsync(Bh, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); HIPCHECK(hipDeviceSynchronize()); - //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Ah[10] + T(1) == Ch[10]); } template @@ -231,7 +226,6 @@ void test24513(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -256,7 +250,7 @@ void test24513(){ D2DAsync(Bd, Ad, size, stream); D2HAsync(Bh, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Cd); HIPCHECK(hipDeviceSynchronize()); D2H(Eh, Cd, size); @@ -270,7 +264,6 @@ void test25134(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch; @@ -294,7 +287,7 @@ void test25134(){ H2DAsync(Ad, Ah, size, stream); D2HAsync(Bh, Ad, size, stream); H2HAsync(Ch, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2DAsync(Cd, Bd, size, stream); D2H(Eh, Cd, size); @@ -310,7 +303,6 @@ void test21345(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch, *Dh; @@ -328,7 +320,7 @@ void test21345(){ H2DAsync(Ad, Ah, size, stream); H2HAsync(Ch, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2DAsync(Bd, Ad, size, stream); D2HAsync(Dh, Bd, size, stream); @@ -343,7 +335,6 @@ void test34512(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Bh, *Ch, *Dh; @@ -363,7 +354,7 @@ void test34512(){ H2D(Ad, Ah, size); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2DAsync(Bd, Ad, size, stream); D2HAsync(Bh, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); @@ -380,7 +371,6 @@ void test35124(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh; @@ -399,7 +389,7 @@ void test35124(){ H2D(Ad, Dh, size); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); D2HAsync(Ah, Ad, size, stream); H2HAsync(Bh, Ah, size, stream); H2DAsync(Bd, Bh, size, stream); @@ -417,7 +407,6 @@ void test31245(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch; T *Dh, *Eh; @@ -437,7 +426,7 @@ void test31245(){ H2D(Ad, Dh, size); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); H2HAsync(Bh, Ah, size, stream); H2DAsync(Bd, Bh, size, stream); D2DAsync(Cd, Bd, size, stream); @@ -457,7 +446,6 @@ void test32451(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch; @@ -477,7 +465,7 @@ void test32451(){ setArray(Eh, N, T(2)); H2D(Ad, Eh, size); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Ad); H2DAsync(Bd, Ah, size, stream); D2DAsync(Cd, Bd, size, stream); D2HAsync(Bh, Cd, size, stream); @@ -494,7 +482,6 @@ template void test45123(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh; @@ -517,7 +504,7 @@ void test45123(){ D2HAsync(Ah, Bd, size, stream); H2HAsync(Bh, Ah, size, stream); H2DAsync(Cd, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Cd); D2H(Ch, Cd, size); HIPCHECK(hipDeviceSynchronize()); @@ -529,7 +516,6 @@ template void test41235(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh; @@ -550,7 +536,7 @@ void test41235(){ D2DAsync(Bd, Ad, size, stream); D2HAsync(Ah, Bd, size, stream); H2DAsync(Cd, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Cd); D2HAsync(Bh, Cd, size, stream); HIPCHECK(hipDeviceSynchronize()); @@ -563,7 +549,6 @@ void test42351(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch; @@ -586,7 +571,7 @@ void test42351(){ D2DAsync(Bd, Ad, size, stream); H2DAsync(Cd, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Cd); D2HAsync(Bh, Cd, size, stream); H2HAsync(Ch, Bh, size, stream); @@ -602,7 +587,6 @@ void test43512(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh; @@ -622,7 +606,7 @@ void test43512(){ H2D(Ad, Dh, size); D2DAsync(Bd, Ad, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2HAsync(Ah, Bd, size, stream); H2HAsync(Bh, Ah, size, stream); H2DAsync(Cd, Bh, size, stream); @@ -637,7 +621,6 @@ void test51234(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh; @@ -659,7 +642,7 @@ void test51234(){ D2HAsync(Ah, Ad, size, stream); H2HAsync(Bh, Ah, size, stream); H2DAsync(Bd, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2DAsync(Cd, Bd, size, stream); D2H(Ch, Cd, size); @@ -673,7 +656,6 @@ template void test52341(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch; @@ -696,7 +678,7 @@ void test52341(){ D2HAsync(Ah, Ad, size, stream); H2DAsync(Bd, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2DAsync(Cd, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); @@ -712,7 +694,6 @@ template void test53412(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = sizeof(T) * N; T *Ah, *Bh, *Ch, *Dh; @@ -739,7 +720,7 @@ void test53412(){ H2D(Bd, Eh, size); D2HAsync(Ah, Ad, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Bd); D2DAsync(Cd, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); H2DAsync(Dd, Ch, size, stream); @@ -757,7 +738,6 @@ void test54123(){ hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); - const size_t N = 1000; const size_t size = N * sizeof(T); T *Ah, *Bh, *Ch; @@ -787,7 +767,7 @@ void test54123(){ D2DAsync(Cd, Bd, size, stream); H2HAsync(Ch, Bh, size, stream); H2DAsync(Dd, Ch, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/512), dim3(512), 0, stream, Dd); D2H(Fh, Cd, size); D2H(Gh, Dd, size); From de85c80eb071ef4379950421a0c2a07bdf10254c Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Mon, 7 Mar 2016 02:01:25 -0600 Subject: [PATCH 13/20] added copyrights for newly added tests --- tests/src/hipHostAlloc.cpp | 22 ++++++++++++++++++++++ tests/src/hipHostGetFlags.cpp | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tests/src/hipHostAlloc.cpp b/tests/src/hipHostAlloc.cpp index a6401e469d..072582d85f 100644 --- a/tests/src/hipHostAlloc.cpp +++ b/tests/src/hipHostAlloc.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include"test_common.h" #define LEN 1024*1024 diff --git a/tests/src/hipHostGetFlags.cpp b/tests/src/hipHostGetFlags.cpp index 648c1f2343..6a9fd9f6f4 100644 --- a/tests/src/hipHostGetFlags.cpp +++ b/tests/src/hipHostGetFlags.cpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include"test_common.h" #include From 4ed0b1cb1a905f7fd190eefce6cbf0f27453d3d7 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Mon, 7 Mar 2016 03:42:50 -0600 Subject: [PATCH 14/20] Added hipHostRegister feature for CUDA backend and its tests --- include/nvcc_detail/hip_runtime_api.h | 8 +++++++ tests/src/CMakeLists.txt | 3 ++- tests/src/hipHostRegister.cpp | 30 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 tests/src/hipHostRegister.cpp diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 6da81c742b..9a46a773e0 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -132,6 +132,14 @@ inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr){ return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); } +inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags){ + return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags)); +} + +inline static hipError_t hipHostUnregister(void* ptr){ + return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); +} + inline static hipError_t hipFreeHost(void* ptr) { return hipCUDAErrorTohipError(cudaFreeHost(ptr)); } diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 0b69839e72..b4627c9ed4 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -129,6 +129,7 @@ make_hip_executable (hipMultiThreadStreams2 hipMultiThreadStreams2.cpp) make_hip_executable (hipHostAlloc hipHostAlloc.cpp) make_hip_executable (hipStreamL5 hipStreamL5.cpp) make_hip_executable (hipHostGetFlags hipHostGetFlags.cpp) +make_hip_executable (hipHostRegister hipHostRegister.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -152,7 +153,7 @@ make_test(hipMemcpy " " ) make_test(hipMemcpyAsync " " ) make_test(hipHostGetFlags " ") make_test(hipHcc " " ) - +make_test(hipHostRegister " ") make_test(hipStreamL5 " ") make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipHostRegister.cpp b/tests/src/hipHostRegister.cpp new file mode 100644 index 0000000000..241d37a802 --- /dev/null +++ b/tests/src/hipHostRegister.cpp @@ -0,0 +1,30 @@ +#include"test_common.h" + +__global__ void Inc(hipLaunchParm lp, float *Ad){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +Ad[tx] = Ad[tx] + float(1); +} + +int main(){ + float *A, *Ad; + const size_t size = N * sizeof(float); + A = (float*)malloc(size); + HIPCHECK(hipHostRegister(A, size, 0)); + + for(int i=0;i Date: Mon, 7 Mar 2016 10:52:40 -0600 Subject: [PATCH 15/20] Added hipHostRegister flags --- include/nvcc_detail/hip_runtime_api.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/nvcc_detail/hip_runtime_api.h b/include/nvcc_detail/hip_runtime_api.h index 9a46a773e0..3722c74c12 100644 --- a/include/nvcc_detail/hip_runtime_api.h +++ b/include/nvcc_detail/hip_runtime_api.h @@ -55,6 +55,9 @@ hipMemcpyHostToHost #define hipHostAllocMapped cudaHostAllocMapped #define hipHostAllocWriteCombined cudaHostAllocWriteCombined +#define hipHostRegisterPortable cudaHostRegisterPortable +#define hipHostRegisterMapped cudaHostRegisterMapped + typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; //typedef cudaChannelFormatDesc hipChannelFormatDesc; From 216af71480f12ee2fc386c9cae41d8da65ebb802 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 8 Mar 2016 03:14:49 -0600 Subject: [PATCH 16/20] Fixed passing unpinned memory to async memcpy --- src/hip_hcc.cpp | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index a86616a14d..5dd5d301b0 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -68,7 +68,7 @@ THE SOFTWARE. static const int release = 1; -int HIP_LAUNCH_BLOCKING = 0; +int HIP_LAUNCH_BLOCKING = 1; int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; @@ -82,8 +82,8 @@ std::vector g_hip_visible_devices; /* vector of integers that contains the //--- // Chicken bits for disabling functionality to work around potential issues: -int HIP_DISABLE_HW_KERNEL_DEP = 1; -int HIP_DISABLE_HW_COPY_DEP = 1; +int HIP_DISABLE_HW_KERNEL_DEP = 0; +int HIP_DISABLE_HW_COPY_DEP = 0; int HIP_DISABLE_BIDIR_MEMCPY = 0; int HIP_ONESHOT_COPY_DEP = 1; // TODO - setting this =1 is a good thing, reduces input deps @@ -2483,6 +2483,8 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp stream = ihipSyncAndResolveStream(stream); + bool trueAsync = true; + #if USE_AM_TRACKER if (stream) { ihipDevice_t *device = stream->getDevice(); @@ -2501,13 +2503,17 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp memcpy(dst, src, sizeBytes); } else { - if (kind == hipMemcpyDefault) { - hc::accelerator acc; - hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); - hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); - am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); - am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); + hc::accelerator acc; + hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); + am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); + am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); + if(dstAm._appAllocationFlags != 1 || srcAm._appAllocationFlags != 1){ + trueAsync = false; + } + + if (kind == hipMemcpyDefault) { if(statDst == AM_SUCCESS && statSrc == AM_SUCCESS){ if(dstAm._devicePointer != NULL){ if(srcAm._devicePointer != NULL){ @@ -2534,7 +2540,10 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp hsa_signal_store_relaxed(ihip_signal->_hsa_signal, 1); ihipCommand_t copyType; - if ((kind == hipMemcpyHostToDevice) || (kind == hipMemcpyDeviceToDevice)) { + if (kind == hipMemcpyHostToDevice ){ + copyType = ihipCommandCopyH2D; + + }else if(kind == hipMemcpyDeviceToDevice) { copyType = ihipCommandCopyH2D; } else if (kind == hipMemcpyDeviceToHost) { copyType = ihipCommandCopyD2H; @@ -2543,7 +2552,11 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp copyType = ihipCommandCopyD2H; } + + #if USE_ROCR_V2 + if(trueAsync == true){ + hsa_signal_t depSignal; int depSignalCnt = stream->copyCommand(ihip_signal, &depSignal, copyType); @@ -2566,6 +2579,10 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp // TODO-stream - does async-copy fall back to sync if input pointers are not pinned? e = hipErrorInvalidValue; } + } + else { + ihipSyncCopy(stream, dst, src, sizeBytes, kind); + } } } else { e = hipErrorInvalidValue; From 45965aaa2d0266fea2146e4c8e1a7c62615a5229 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 8 Mar 2016 03:17:27 -0600 Subject: [PATCH 17/20] Revert "Fixed passing unpinned memory to async memcpy" This reverts commit 216af71480f12ee2fc386c9cae41d8da65ebb802. --- src/hip_hcc.cpp | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 5dd5d301b0..a86616a14d 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -68,7 +68,7 @@ THE SOFTWARE. static const int release = 1; -int HIP_LAUNCH_BLOCKING = 1; +int HIP_LAUNCH_BLOCKING = 0; int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; @@ -82,8 +82,8 @@ std::vector g_hip_visible_devices; /* vector of integers that contains the //--- // Chicken bits for disabling functionality to work around potential issues: -int HIP_DISABLE_HW_KERNEL_DEP = 0; -int HIP_DISABLE_HW_COPY_DEP = 0; +int HIP_DISABLE_HW_KERNEL_DEP = 1; +int HIP_DISABLE_HW_COPY_DEP = 1; int HIP_DISABLE_BIDIR_MEMCPY = 0; int HIP_ONESHOT_COPY_DEP = 1; // TODO - setting this =1 is a good thing, reduces input deps @@ -2483,8 +2483,6 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp stream = ihipSyncAndResolveStream(stream); - bool trueAsync = true; - #if USE_AM_TRACKER if (stream) { ihipDevice_t *device = stream->getDevice(); @@ -2503,17 +2501,13 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp memcpy(dst, src, sizeBytes); } else { - hc::accelerator acc; - hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); - hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); - am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); - am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); - - if(dstAm._appAllocationFlags != 1 || srcAm._appAllocationFlags != 1){ - trueAsync = false; - } - if (kind == hipMemcpyDefault) { + hc::accelerator acc; + hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); + am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); + am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); + if(statDst == AM_SUCCESS && statSrc == AM_SUCCESS){ if(dstAm._devicePointer != NULL){ if(srcAm._devicePointer != NULL){ @@ -2540,10 +2534,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp hsa_signal_store_relaxed(ihip_signal->_hsa_signal, 1); ihipCommand_t copyType; - if (kind == hipMemcpyHostToDevice ){ - copyType = ihipCommandCopyH2D; - - }else if(kind == hipMemcpyDeviceToDevice) { + if ((kind == hipMemcpyHostToDevice) || (kind == hipMemcpyDeviceToDevice)) { copyType = ihipCommandCopyH2D; } else if (kind == hipMemcpyDeviceToHost) { copyType = ihipCommandCopyD2H; @@ -2552,11 +2543,7 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp copyType = ihipCommandCopyD2H; } - - #if USE_ROCR_V2 - if(trueAsync == true){ - hsa_signal_t depSignal; int depSignalCnt = stream->copyCommand(ihip_signal, &depSignal, copyType); @@ -2579,10 +2566,6 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp // TODO-stream - does async-copy fall back to sync if input pointers are not pinned? e = hipErrorInvalidValue; } - } - else { - ihipSyncCopy(stream, dst, src, sizeBytes, kind); - } } } else { e = hipErrorInvalidValue; From 18c0e69f515614167af400a3fadb3af28f31dfc0 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 8 Mar 2016 03:19:50 -0600 Subject: [PATCH 18/20] v2 added support for unpinned async memcpy --- src/hip_hcc.cpp | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index a86616a14d..313d0ebbd0 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2483,6 +2483,8 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp stream = ihipSyncAndResolveStream(stream); + bool trueAsync = true; + #if USE_AM_TRACKER if (stream) { ihipDevice_t *device = stream->getDevice(); @@ -2501,13 +2503,17 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp memcpy(dst, src, sizeBytes); } else { - if (kind == hipMemcpyDefault) { - hc::accelerator acc; - hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); - hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); - am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); - am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); + hc::accelerator acc; + hc::AmPointerInfo dstAm(NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcAm(NULL, NULL, 0, acc, 0, 0); + am_status_t statDst = hc::am_memtracker_getinfo(&dstAm, dst); + am_status_t statSrc = hc::am_memtracker_getinfo(&srcAm, src); + if(dstAm._appAllocationFlags != 1 || srcAm._appAllocationFlags != 1){ + trueAsync = false; + } + + if (kind == hipMemcpyDefault) { if(statDst == AM_SUCCESS && statSrc == AM_SUCCESS){ if(dstAm._devicePointer != NULL){ if(srcAm._devicePointer != NULL){ @@ -2534,7 +2540,10 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp hsa_signal_store_relaxed(ihip_signal->_hsa_signal, 1); ihipCommand_t copyType; - if ((kind == hipMemcpyHostToDevice) || (kind == hipMemcpyDeviceToDevice)) { + if (kind == hipMemcpyHostToDevice ){ + copyType = ihipCommandCopyH2D; + + }else if(kind == hipMemcpyDeviceToDevice) { copyType = ihipCommandCopyH2D; } else if (kind == hipMemcpyDeviceToHost) { copyType = ihipCommandCopyD2H; @@ -2543,7 +2552,11 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp copyType = ihipCommandCopyD2H; } + + #if USE_ROCR_V2 + if(trueAsync == true){ + hsa_signal_t depSignal; int depSignalCnt = stream->copyCommand(ihip_signal, &depSignal, copyType); @@ -2566,6 +2579,10 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp // TODO-stream - does async-copy fall back to sync if input pointers are not pinned? e = hipErrorInvalidValue; } + } + else { + ihipSyncCopy(stream, dst, src, sizeBytes, kind); + } } } else { e = hipErrorInvalidValue; From cbb42c6b6aed439591c8eb49b35ed1d3747f4a30 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 8 Mar 2016 03:40:56 -0600 Subject: [PATCH 19/20] Added a random test for memcpyAsync --- tests/src/CMakeLists.txt | 4 +- tests/src/hipRandomMemcpyAsync.cpp | 80 ++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 tests/src/hipRandomMemcpyAsync.cpp diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index b4627c9ed4..f8408df4ff 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -130,6 +130,8 @@ make_hip_executable (hipHostAlloc hipHostAlloc.cpp) make_hip_executable (hipStreamL5 hipStreamL5.cpp) make_hip_executable (hipHostGetFlags hipHostGetFlags.cpp) make_hip_executable (hipHostRegister hipHostRegister.cpp) +make_hip_executable (hipRandomMemcpyAsync hipRandomMemcpyAsync.cpp) + target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -155,5 +157,5 @@ make_test(hipHostGetFlags " ") make_test(hipHcc " " ) make_test(hipHostRegister " ") make_test(hipStreamL5 " ") - +make_test(hipRandomMemcpyAsync " ") make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipRandomMemcpyAsync.cpp b/tests/src/hipRandomMemcpyAsync.cpp new file mode 100644 index 0000000000..cc4a5d725a --- /dev/null +++ b/tests/src/hipRandomMemcpyAsync.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include "hip_runtime.h" +#include "test_common.h" + +#define WIDTH 1024 +#define HEIGHT 1024 + +#define NUM (WIDTH*HEIGHT) + +#define THREADS_PER_BLOCK_X 16 +#define THREADS_PER_BLOCK_Y 16 +#define THREADS_PER_BLOCK_Z 1 + +int main() { + + int *hostA; + int *hostB; + + int *deviceA; + int *deviceB; + + int i; + int errors; + + hostA = (int *)malloc(NUM * sizeof(int)); + hostB = (int *)malloc(NUM * sizeof(int)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + hostB[i] = i; + } + + HIPCHECK(hipMalloc((void**)&deviceA, NUM * sizeof(int))); + HIPCHECK(hipMalloc((void**)&deviceB, NUM * sizeof(int))); + + hipStream_t s; + HIPCHECK(hipStreamCreate(&s)); + + + // hostB -> deviceB -> hostA +#define ASYNC 1 +#if ASYNC + HIPCHECK(hipMemcpyAsync(deviceB, hostB, NUM*sizeof(int), hipMemcpyHostToDevice, s)); + HIPCHECK(hipMemcpyAsync(hostA, deviceB, NUM*sizeof(int), hipMemcpyDeviceToHost, s)); +#else + HIPCHECK(hipMemcpy(deviceB, hostB, NUM*sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(hostA, deviceB, NUM*sizeof(int), hipMemcpyDeviceToHost)); +#endif + + HIPCHECK(hipStreamSynchronize(s)); + HIPCHECK(hipDeviceSynchronize()); + + // verify the results + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostA[i] != (hostB[i])) { + errors++; + } + } + + HIPCHECK(hipStreamDestroy(s)); + + HIPCHECK(hipFree(deviceA)); + HIPCHECK(hipFree(deviceB)); + + free(hostA); + free(hostB); + + //hipResetDefaultAccelerator(); + + if(errors != 0){ + HIPASSERT(1 == 2); + }else{ + passed(); + } + + return errors; +} From 102f1733967b342490fcdf9faa46c5b755969837 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 8 Mar 2016 12:57:22 -0600 Subject: [PATCH 20/20] Added hipHostRegister for hip with tests and added copyright --- include/hcc_detail/hip_runtime_api.h | 24 +++++++++++++++ src/hip_hcc.cpp | 45 ++++++++++++++++++++++++++++ tests/src/hipHostRegister.cpp | 26 +++++++++++++++- tests/src/hipKernel.cpp | 19 ++++++++++++ tests/src/hipMemcpyAsync.cpp | 19 ++++++++++++ tests/src/hipRandomMemcpyAsync.cpp | 19 ++++++++++++ tests/src/hipSimpleAtomicsTest.cpp | 19 ++++++++++++ tests/src/hipStream.h | 19 ++++++++++++ tests/src/hip_ballot.cpp | 19 ++++++++++++ tests/src/specialFunc.cu | 19 ++++++++++++ tests/src/test_common.h | 19 ++++++++++++ 11 files changed, 246 insertions(+), 1 deletion(-) diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 0d83dedd64..9f0b55251a 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -61,6 +61,11 @@ extern "C" { #define hipHostAllocMapped 0x2 #define hipHostAllocWriteCombined 0x4 +#define hipHostRegisterDefault 0x0 +#define hipHostRegisterPortable 0x1 +#define hipHostRegisterMapped 0x2 +#define hipHostRegisterIoMemory 0x4 + /** * @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored. */ @@ -696,6 +701,25 @@ hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, size_t size) ; */ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) ; +/** + * @brief Pin host memory + * + * @param[out] hostPtr Pointer to host memory to be pinned + * @param[in] sizeBytes size of the host memory + * @param[in] flags Type of pinning the the host memory + * @return Error code + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) ; + +/** + * @brief Un-pin host pointer + * + * @param[in] hostPtr Pinned Host Pointer + * @return Error code + */ +hipError_t hipHostUnregister(void* hostPtr) ; + + /** * @brief Free memory allocated by the hcc hip memory allocation API. * This API performs an implicit hipDeviceSynchronize() call. diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 313d0ebbd0..4a4dd31b0a 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -2090,6 +2090,51 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) return ihipLogStatus(hip_status); } +hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) +{ + std::call_once(hip_initialized, ihipInit); + hipError_t hip_status = hipSuccess; + + auto device = ihipGetTlsDefaultDevice(); + void* srcPtr; + if(hostPtr == NULL){ + return ihipLogStatus(hipErrorInvalidValue); + } + if(device){ + if(flags == hipHostAllocDefault){ + hsa_status_t hsa_status = hsa_amd_memory_lock(hostPtr, sizeBytes, &device->_hsa_agent, 1, &srcPtr); + if(hsa_status == HSA_STATUS_SUCCESS){ + hip_status = hipSuccess; + }else{ + hip_status = hipErrorMemoryAllocation; + } + } + else if (flags | hipHostRegisterMapped){ + hsa_status_t hsa_status = hsa_amd_memory_lock(hostPtr, sizeBytes, &device->_hsa_agent, 1, &srcPtr); + //TODO: Added feature for actual host pointer being tracked + if(hsa_status != HSA_STATUS_SUCCESS){ + hip_status = hipErrorMemoryAllocation; + } + } + } + return ihipLogStatus(hip_status); +} + +hipError_t hipHostUnregister(void *hostPtr){ + std::call_once(hip_initialized, ihipInit); + hipError_t hip_status = hipSuccess; + if(hostPtr == NULL){ + hip_status = hipErrorInvalidValue; + }else{ + hsa_status_t hsa_status = hsa_amd_memory_unlock(hostPtr); + if(hsa_status != HSA_STATUS_SUCCESS){ + hip_status = hipErrorInvalidValue; +// TODO: Add a different return error. This is not true + } + } + return ihipLogStatus(hip_status); +} + //--- hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { diff --git a/tests/src/hipHostRegister.cpp b/tests/src/hipHostRegister.cpp index 241d37a802..b9e4632369 100644 --- a/tests/src/hipHostRegister.cpp +++ b/tests/src/hipHostRegister.cpp @@ -1,4 +1,24 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include"test_common.h" +#include __global__ void Inc(hipLaunchParm lp, float *Ad){ int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; @@ -8,7 +28,11 @@ Ad[tx] = Ad[tx] + float(1); int main(){ float *A, *Ad; const size_t size = N * sizeof(float); - A = (float*)malloc(size); +#ifdef __HIP_PLATFORM_NVCC__ + A = (float*)malloc(size*2); +#else + A = (float*)memalign(64, size); +#endif HIPCHECK(hipHostRegister(A, size, 0)); for(int i=0;i #include #include diff --git a/tests/src/hipSimpleAtomicsTest.cpp b/tests/src/hipSimpleAtomicsTest.cpp index 975b94d18b..1be32f6679 100644 --- a/tests/src/hipSimpleAtomicsTest.cpp +++ b/tests/src/hipSimpleAtomicsTest.cpp @@ -1,3 +1,22 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // includes, system #include #include diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h index f9ec3472d0..3cf1284671 100644 --- a/tests/src/hipStream.h +++ b/tests/src/hipStream.h @@ -1,3 +1,22 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #ifndef HIPSTREAM_H #define HIPSTREAM_H #include diff --git a/tests/src/hip_ballot.cpp b/tests/src/hip_ballot.cpp index 5af4c32d7b..e1adb3095d 100644 --- a/tests/src/hip_ballot.cpp +++ b/tests/src/hip_ballot.cpp @@ -1,3 +1,22 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include #include diff --git a/tests/src/specialFunc.cu b/tests/src/specialFunc.cu index c5c1931024..085be062d9 100644 --- a/tests/src/specialFunc.cu +++ b/tests/src/specialFunc.cu @@ -1,3 +1,22 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + //Test to ensure hipify runs correctly. // Hipify may report warnings for some missing/unsupported functions diff --git a/tests/src/test_common.h b/tests/src/test_common.h index e37eec7e86..0a2ba96d22 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -1,3 +1,22 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + #include #include #include