From 7ebbbd35254be59b305bbca10e8a4b472eaedf30 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 27 Feb 2019 15:42:54 +0000 Subject: [PATCH 1/6] Add hipMemsetD32 and hipMemsetD32Async Add 2 extra memset functions which fills memory with integer-typed data Also change the parameters of ihipMemset to better explain the semantic --- include/hip/hcc_detail/hip_runtime_api.h | 28 ++++++++++++++ src/hip_memory.cpp | 48 +++++++++++++++++++----- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index b6ae88729a..9c50ca4755 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -1504,6 +1504,17 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes); */ hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes); +/** + * @brief Fills the memory area pointed to by dest with the constant integer + * value for specified number of times. + * + * @param[out] dst Data being filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count); + /** * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant * byte value value. @@ -1521,6 +1532,23 @@ hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeByte */ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dev with the constant integer + * value for specified number of times. + * + * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value - Value to set for each byte of specified memory + * @param[in] count - number of values to be set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemsetD32Async(void* dst, int value, size_t count, hipStream_t stream __dparm(0)); + /** * @brief Fills the memory area pointed to by dst with the constant value. * diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 8e29bee68e..b194faa1ca 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1508,13 +1508,13 @@ __global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, } // namespace template -void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { +void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t count) { static constexpr uint32_t block_dim = 256; - const uint32_t grid_dim = clamp_integer(sizeBytes / block_dim, 1, UINT32_MAX); + const uint32_t grid_dim = clamp_integer(count / block_dim, 1, UINT32_MAX); hipLaunchKernelGGL(hip_fill_n, dim3(grid_dim), dim3{block_dim}, 0u, stream, ptr, - sizeBytes, std::move(val)); + count, std::move(val)); } template @@ -1533,20 +1533,20 @@ typedef enum ihipMemsetDataType { ihipMemsetDataTypeInt = 2 }ihipMemsetDataType; -hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t stream, enum ihipMemsetDataType copyDataType ) +hipError_t ihipMemset(void* dst, int value, size_t count, hipStream_t stream, enum ihipMemsetDataType copyDataType ) { hipError_t e = hipSuccess; - if (sizeBytes == 0) return e; + if (count == 0) return e; if (stream && (dst != NULL)) { if(copyDataType == ihipMemsetDataTypeChar){ - if ((sizeBytes & 0x3) == 0) { + if ((count & 0x3) == 0) { // use a faster dword-per-workitem copy: try { value = value & 0xff; uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; - ihipMemsetKernel (stream, static_cast (dst), value32, sizeBytes/sizeof(uint32_t)); + ihipMemsetKernel (stream, static_cast (dst), value32, count/sizeof(uint32_t)); } catch (std::exception &ex) { e = hipErrorInvalidValue; @@ -1554,7 +1554,7 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea } else { // use a slow byte-per-workitem copy: try { - ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + ihipMemsetKernel (stream, static_cast (dst), value, count); } catch (std::exception &ex) { e = hipErrorInvalidValue; @@ -1563,14 +1563,14 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea } else { if(copyDataType == ihipMemsetDataTypeInt) { // 4 Bytes value try { - ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + ihipMemsetKernel (stream, static_cast (dst), value, count); } catch (std::exception &ex) { e = hipErrorInvalidValue; } } else if(copyDataType == ihipMemsetDataTypeShort) { try { value = value & 0xffff; - ihipMemsetKernel (stream, static_cast (dst), value, sizeBytes); + ihipMemsetKernel (stream, static_cast (dst), value, count); } catch (std::exception &ex) { e = hipErrorInvalidValue; } @@ -1719,6 +1719,18 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st return ihipLogStatus(e); }; +hipError_t hipMemsetD32Async(void* dst, int value, size_t count, hipStream_t stream) { + HIP_INIT_SPECIAL_API(hipMemsetD32Async, (TRACE_MCMD), dst, value, count, stream); + + hipError_t e = hipSuccess; + + stream = ihipSyncAndResolveStream(stream); + + e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeInt); + + return ihipLogStatus(e); +}; + hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_SPECIAL_API(hipMemset, (TRACE_MCMD), dst, value, sizeBytes); @@ -1787,6 +1799,22 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes return ihipLogStatus(e); } +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count); + + hipError_t e = hipSuccess; + + hipStream_t stream = hipStreamNull; + stream = ihipSyncAndResolveStream(stream); + if (stream) { + e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeInt); + stream->locked_wait(); + } else { + e = hipErrorInvalidValue; + } + return ihipLogStatus(e); +} + hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ) { HIP_INIT_SPECIAL_API(hipMemset3D, (TRACE_MCMD), &pitchedDevPtr, value, &extent); From bfde8a7fab262f23079a13a7639986332644d88a Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Mon, 4 Mar 2019 17:11:54 +0000 Subject: [PATCH 2/6] Add direct test for hipMemsetD32 and hipMemsetD32Async --- tests/src/runtimeApi/memory/hipMemset.cpp | 61 +++++++++++++++++++++-- tests/src/test_common.cpp | 7 +++ tests/src/test_common.h | 1 + 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/tests/src/runtimeApi/memory/hipMemset.cpp b/tests/src/runtimeApi/memory/hipMemset.cpp index bcb0adbeef..8ac5beaf55 100644 --- a/tests/src/runtimeApi/memory/hipMemset.cpp +++ b/tests/src/runtimeApi/memory/hipMemset.cpp @@ -26,11 +26,11 @@ THE SOFTWARE. * BUILD: %t %s ../../test_common.cpp * RUN: %t * //Small copy - * RUN: %t -N 10 --memsetval 0x42 + * RUN: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 * // Oddball size - * RUN: %t -N 10013 --memsetval 0x5a + * RUN: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF * // Big copy - * RUN: %t -N 256M --memsetval 0xa6 + * RUN: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE * HIT_END */ @@ -62,6 +62,30 @@ bool testhipMemset(int memsetval,int p_gpuDevice) return testResult; } +bool testhipMemsetD32(int memsetD32val,int p_gpuDevice) +{ + size_t Nbytes = N*sizeof(int); + printf ("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n", N, memsetD32val, p_gpuDevice); + int *A_d; + int *A_h; + bool testResult = true; + + HIPCHECK ( hipMalloc(&A_d, Nbytes) ); + A_h = (int*)malloc(Nbytes); + HIPCHECK ( hipMemsetD32(A_d, memsetD32val, N) ); + HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); + + for (int i=0; i= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD32val argument"); + } + memsetD32val = ex; } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { failed("Bad iterations argument"); diff --git a/tests/src/test_common.h b/tests/src/test_common.h index bacbf35a22..8820381826 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -98,6 +98,7 @@ THE SOFTWARE. // standard command-line variables: extern size_t N; extern char memsetval; +extern int memsetD32val; extern int iterations; extern unsigned blocksPerCU; extern unsigned threadsPerBlock; From 0337b686ef68b701717af05278b701c3a3916fa4 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Mon, 4 Mar 2019 20:11:12 -0800 Subject: [PATCH 3/6] Add implementation for NVCC path --- include/hip/nvcc_detail/hip_runtime_api.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 02c4b7ee61..abce9a5fc6 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -655,11 +655,20 @@ inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); } +inline static hipError_t hipMemsetD32(void* devPtr, int value, size_t count) { + return hipCUDAErrorTohipError(cuMemsetD32(devPtr, value, count)); +} + inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); } +inline static hipError_t hipMemsetD32Async(void* devPtr, int value, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cuMemsetD32Async(devPtr, value, count, stream)); +} + inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); } From 5cbd28f29b7e55695023c9110d71eee0f2b4b834 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Tue, 5 Mar 2019 05:51:05 +0000 Subject: [PATCH 4/6] Address code review comments to use hipDeviceptr_t --- include/hip/hcc_detail/hip_runtime_api.h | 3 ++- include/hip/nvcc_detail/hip_runtime_api.h | 6 +++--- src/hip_memory.cpp | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 9c50ca4755..73996982d1 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -1547,7 +1547,8 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st * @param[in] stream - Stream identifier * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree */ -hipError_t hipMemsetD32Async(void* dst, int value, size_t count, hipStream_t stream __dparm(0)); +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream __dparm(0)); /** * @brief Fills the memory area pointed to by dst with the constant value. diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index abce9a5fc6..a463d17e6d 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -655,7 +655,7 @@ inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); } -inline static hipError_t hipMemsetD32(void* devPtr, int value, size_t count) { +inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) { return hipCUDAErrorTohipError(cuMemsetD32(devPtr, value, count)); } @@ -664,8 +664,8 @@ inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); } -inline static hipError_t hipMemsetD32Async(void* devPtr, int value, size_t count, - hipStream_t stream __dparm(0)) { +inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count, + hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cuMemsetD32Async(devPtr, value, count, stream)); } diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index b194faa1ca..d8a9bd5708 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1719,7 +1719,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t st return ihipLogStatus(e); }; -hipError_t hipMemsetD32Async(void* dst, int value, size_t count, hipStream_t stream) { +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipMemsetD32Async, (TRACE_MCMD), dst, value, count, stream); hipError_t e = hipSuccess; From 8db717c769b591bed5fb279e3259922ce964f977 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 5 Mar 2019 12:10:01 +0530 Subject: [PATCH 5/6] Update hip_runtime_api.h Use hipCUResultTohipError instead of hipCUDAErrorTohipError in hipMemsetD32 & hipMemsetD32Async. --- include/hip/nvcc_detail/hip_runtime_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index a463d17e6d..add4c3f238 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -656,7 +656,7 @@ inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { } inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) { - return hipCUDAErrorTohipError(cuMemsetD32(devPtr, value, count)); + return hipCUResultTohipError(cuMemsetD32(devPtr, value, count)); } inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, @@ -666,7 +666,7 @@ inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cuMemsetD32Async(devPtr, value, count, stream)); + return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream)); } inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { From 51f29b9cee9633b8f7e90f5eb616a9adae188d29 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 5 Mar 2019 12:11:11 +0530 Subject: [PATCH 6/6] Update hipMemset.cpp Address build issues on nvcc path. --- tests/src/runtimeApi/memory/hipMemset.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/src/runtimeApi/memory/hipMemset.cpp b/tests/src/runtimeApi/memory/hipMemset.cpp index 8ac5beaf55..abff987437 100644 --- a/tests/src/runtimeApi/memory/hipMemset.cpp +++ b/tests/src/runtimeApi/memory/hipMemset.cpp @@ -72,7 +72,7 @@ bool testhipMemsetD32(int memsetD32val,int p_gpuDevice) HIPCHECK ( hipMalloc(&A_d, Nbytes) ); A_h = (int*)malloc(Nbytes); - HIPCHECK ( hipMemsetD32(A_d, memsetD32val, N) ); + HIPCHECK ( hipMemsetD32((hipDeviceptr_t)A_d, memsetD32val, N) ); HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); for (int i=0; i