diff --git a/projects/clr/hipamd/bin/hipify-perl b/projects/clr/hipamd/bin/hipify-perl index 225bb08887..5bf6a4fe9a 100755 --- a/projects/clr/hipamd/bin/hipify-perl +++ b/projects/clr/hipamd/bin/hipify-perl @@ -198,6 +198,8 @@ sub simpleSubstitutions { $ft{'memory'} += s/\bcuIpcOpenMemHandle\b/hipIpcOpenMemHandle/g; $ft{'memory'} += s/\bcuMemAlloc\b/hipMalloc/g; $ft{'memory'} += s/\bcuMemAllocManaged\b/hipMemAllocManaged/g; + $ft{'memory'} += s/\bcuMemAllocPitch\b/hipMemAllocPitch/g; + $ft{'memory'} += s/\bcuMemAllocPitch_v2\b/hipMemAllocPitch/g; $ft{'memory'} += s/\bcuMemAlloc_v2\b/hipMalloc/g; $ft{'memory'} += s/\bcuMemFree\b/hipFree/g; $ft{'memory'} += s/\bcuMemFreeHost\b/hipHostFree/g; @@ -207,6 +209,8 @@ sub simpleSubstitutions { $ft{'memory'} += s/\bcuMemGetInfo\b/hipMemGetInfo/g; $ft{'memory'} += s/\bcuMemGetInfo_v2\b/hipMemGetInfo/g; $ft{'memory'} += s/\bcuMemHostAlloc\b/hipHostMalloc/g; + $ft{'memory'} += s/\bcuMemAllocHost\b/hipMemAllocHost/g; + $ft{'memory'} += s/\bcuMemAllocHost_v2\b/hipMemAllocHost/g; $ft{'memory'} += s/\bcuMemHostGetDevicePointer\b/hipHostGetDevicePointer/g; $ft{'memory'} += s/\bcuMemHostGetDevicePointer_v2\b/hipHostGetDevicePointer/g; $ft{'memory'} += s/\bcuMemHostGetFlags\b/hipMemHostGetFlags/g; @@ -238,6 +242,10 @@ sub simpleSubstitutions { $ft{'memory'} += s/\bcuMemsetD32_v2\b/hipMemsetD32/g; $ft{'memory'} += s/\bcuMemsetD8\b/hipMemsetD8/g; $ft{'memory'} += s/\bcuMemsetD8_v2\b/hipMemsetD8/g; + $ft{'memory'} += s/\bcuMemsetD8Async\b/hipMemsetD8Async/g; + $ft{'memory'} += s/\bcuMemsetD16\b/hipMemsetD16/g; + $ft{'memory'} += s/\bcuMemsetD16_v2\b/hipMemsetD16/g; + $ft{'memory'} += s/\bcuMemsetD16Async\b/hipMemsetD16Async/g; $ft{'memory'} += s/\bcudaFree\b/hipFree/g; $ft{'memory'} += s/\bcudaFreeArray\b/hipFreeArray/g; $ft{'memory'} += s/\bcudaFreeHost\b/hipHostFree/g; diff --git a/projects/clr/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/projects/clr/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index cbd4ccb818..5a6ca9fc1e 100644 --- a/projects/clr/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/projects/clr/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -857,9 +857,9 @@ | `cuIpcOpenEventHandle` | | | `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` | | `cuMemAlloc` | `hipMalloc` | -| `cuMemAllocHost` | | +| `cuMemAllocHost` | `hipMemAllocHost` | | `cuMemAllocManaged` | `hipMemAllocManaged` | -| `cuMemAllocPitch` | | +| `cuMemAllocPitch` | `hipMemAllocPitch` | | `cuMemcpy` | | | `cuMemcpy2D` | `hipMemcpyParam2D` | | `cuMemcpy2DAsync` | `hipMemcpyParam2DAsync` | @@ -893,8 +893,8 @@ | `cuMemHostGetFlags` | `hipHostGetFlags` | | `cuMemHostRegister` | `hipHostRegister` | | `cuMemHostUnregister` | `hipHostUnregister` | -| `cuMemsetD16` | | -| `cuMemsetD16Async` | | +| `cuMemsetD16` | `hipMemsetD16` | +| `cuMemsetD16Async` | `hipMemsetD16Async` | | `cuMemsetD2D16` | | | `cuMemsetD2D16Async` | | | `cuMemsetD2D32` | | @@ -904,7 +904,7 @@ | `cuMemsetD32` | `hipMemsetD32` | | `cuMemsetD32Async` | `hipMemsetD32Async` | | `cuMemsetD8` | `hipMemsetD8` | -| `cuMemsetD8Async` | | +| `cuMemsetD8Async` | `hipMemsetD8Async` | | `cuMipmappedArrayCreate` | | | `cuMipmappedArrayDestroy` | | | `cuMipmappedArrayGetLevel` | | diff --git a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index 72534a96b6..792d34b3ce 100644 --- a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -162,14 +162,14 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ {"cuMemAlloc", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, {"cuMemAlloc_v2", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, // cudaHostAlloc - {"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocHost", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}}, + {"cuMemAllocHost_v2", {"hipMemAllocHost", "", CONV_MEMORY, API_DRIVER}}, // cudaMallocManaged {"cuMemAllocManaged", {"hipMemAllocManaged", "", CONV_MEMORY, API_DRIVER}}, // no analogue // NOTE: Not equal to cudaMallocPitch due to different signatures - {"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, + {"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, // no analogue // NOTE: Not equal to cudaMemcpy due to different signatures {"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, @@ -271,10 +271,10 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ // cudaHostUnregister {"cuMemHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_DRIVER}}, // no analogue - {"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, + {"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, // no analogue - {"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER}}, // no analogue {"cuMemsetD2D16", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, {"cuMemsetD2D16_v2", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, @@ -299,7 +299,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ {"cuMemsetD8", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, {"cuMemsetD8_v2", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, // no analogue - {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER}}, // no analogue // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index edd63b96f0..1ccc87d216 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -1115,6 +1115,21 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag DEPRECATED("use hipHostMalloc instead") hipError_t hipMallocHost(void** ptr, size_t size); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipMemAllocHost(void** ptr, size_t size); + /** * @brief Allocate device accessible page locked host memory * @@ -1250,6 +1265,30 @@ hipError_t hipHostUnregister(void* hostPtr); hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] dptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array. + * Given the row and column of an array element of type T, the address is computed as: + * T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * + * @return Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes); + /** * @brief Free memory allocated by the hcc hip memory allocation API. * This API performs an implicit hipDeviceSynchronize() call. @@ -1708,6 +1747,51 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes); */ hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, hipStream_t stream __dparm(0)); + +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes); + +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, hipStream_t stream __dparm(0)); + /** * @brief Fills the memory area pointed to by dest with the constant integer * value for specified number of times. diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 20d2fa8842..b788aa5cdc 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -459,6 +459,10 @@ inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); } +inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){ + return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes)); +} + inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); } @@ -471,6 +475,12 @@ inline static hipError_t hipMallocHost(void** ptr, size_t size) { return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); } +inline static hipError_t hipMemAllocHost(void** ptr, size_t size) + __attribute__((deprecated("use hipHostMalloc instead"))); +inline static hipError_t hipMemAllocHost(void** ptr, size_t size) { + return hipCUResultTohipError(cuMemAllocHost(ptr, size)); +} + inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute__((deprecated("use hipHostMalloc instead"))); inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) { @@ -761,6 +771,20 @@ inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, s return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); } +inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, + hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream)); +} + +inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) { + return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes)); +} + +inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, + hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream)); +} + inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height)); } diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 43eeac739d..ed1422fcda 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -391,6 +391,8 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) { // Deprecated function: hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); } +// Deprecated function: +hipError_t hipMemAllocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); } // Deprecated function: hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { @@ -465,6 +467,15 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height return ihipLogStatus(hip_status); } +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes){ + HIP_INIT_SPECIAL_API(hipMemAllocPitch, (TRACE_MEM), dptr, pitch, widthInBytes, height,elementSizeBytes); + HIP_SET_DEVICE(); + + if (widthInBytes == 0 || height == 0) return ihipLogStatus(hipErrorInvalidValue); + + return ihipLogStatus(ihipMallocPitch(tls, dptr, pitch, widthInBytes, height, 0)); +} + hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent); HIP_SET_DEVICE(); @@ -1786,28 +1797,65 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp return ihipLogStatus(e); } +hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) { + if (pCopy == nullptr) { + return hipErrorInvalidValue; + } + void* dst; const void* src; + size_t spitch = pCopy->srcPitch; + size_t dpitch = pCopy->dstPitch; + switch(pCopy->srcMemoryType){ + case hipMemoryTypeHost: + src = pCopy->srcHost; + break; + case hipMemoryTypeArray: + src = pCopy->srcArray->data; + spitch = pCopy->WidthInBytes; + break; + case hipMemoryTypeUnified: + case hipMemoryTypeDevice: + src = pCopy->srcDevice; + break; + default: + return hipErrorInvalidValue; + } + switch(pCopy->dstMemoryType){ + case hipMemoryTypeHost: + dst = pCopy->dstHost; + break; + case hipMemoryTypeArray: + dst = pCopy->dstArray->data; + dpitch = pCopy->WidthInBytes; + break; + case hipMemoryTypeUnified: + case hipMemoryTypeDevice: + dst = pCopy->dstDevice; + break; + default: + return hipErrorInvalidValue; + } + if(pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes || pCopy->srcY >= pCopy->Height){ + return hipErrorInvalidValue; + } else if(pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes || pCopy->dstY >= pCopy->Height){ + return hipErrorInvalidValue; + } + src = (void*)((char*)src+pCopy->srcY*pCopy->srcPitch + pCopy->srcXInBytes); + dst = (void*)((char*)dst+pCopy->dstY*pCopy->dstPitch + pCopy->dstXInBytes); + if(isAsync){ + return ihipMemcpy2DAsync(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream); + } else{ + return ihipMemcpy2D(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault); + } +} + hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), pCopy); - hipError_t e = hipSuccess; - if (pCopy == nullptr) { - e = hipErrorInvalidValue; - } else { - e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch, - pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault); - } - return ihipLogStatus(e); + return ihipLogStatus(ihipMemcpyParam2D(pCopy, hipStreamNull, false)); } hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipMemcpyParam2DAsync, (TRACE_MCMD), pCopy, stream); - hipError_t e = hipSuccess; - if (pCopy == nullptr) { - e = hipErrorInvalidValue; - } else { - e = ihipMemcpy2DAsync(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch, - pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream); - } - return ihipLogStatus(e); + return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true)); } // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. @@ -1903,6 +1951,42 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes return ihipLogStatus(e); } +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes , hipStream_t stream ) { + HIP_INIT_SPECIAL_API(hipMemsetD8Async, (TRACE_MCMD), dst, value, sizeBytes, stream); + + stream = ihipSyncAndResolveStream(stream); + if (stream) { + return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar)); + } else { + return ihipLogStatus(hipErrorInvalidValue); + } +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes){ + HIP_INIT_SPECIAL_API(hipMemsetD16, (TRACE_MCMD), dst, value, sizeBytes); + hipError_t e = hipSuccess; + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + if (stream) { + e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort); + if(hipSuccess == e) + stream->locked_wait(); + } else { + e = hipErrorInvalidValue; + } + return ihipLogStatus(e); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t sizeBytes, hipStream_t stream ){ + HIP_INIT_SPECIAL_API(hipMemsetD16Async, (TRACE_MCMD), dst, value, sizeBytes, stream); + + stream = ihipSyncAndResolveStream(stream); + if (stream) { + return ihipLogStatus(ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeShort)); + } else { + return ihipLogStatus(hipErrorInvalidValue); + } +} + hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count); diff --git a/projects/clr/hipamd/tests/src/deviceLib/hipTestHalf.cpp b/projects/clr/hipamd/tests/src/deviceLib/hipTestHalf.cpp index 2056cffa91..b78e1d8c63 100644 --- a/projects/clr/hipamd/tests/src/deviceLib/hipTestHalf.cpp +++ b/projects/clr/hipamd/tests/src/deviceLib/hipTestHalf.cpp @@ -234,7 +234,7 @@ void checkFunctional() { int main() { bool* result{nullptr}; - hipHostMalloc(&result, sizeof(result)); + hipMemAllocHost((void**)&result, sizeof(result)); result[0] = false; hipLaunchKernelGGL( diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipArray.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipArray.cpp index c022a40079..9b3b18521e 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipArray.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipArray.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * TEST: %t * HIT_END */ @@ -34,6 +34,21 @@ void printSep() { "======================================================================================\n"); } +inline void initMemCpyParam2D(hip_Memcpy2D &ins, const size_t dpitch, + const size_t spitch, const size_t width, + const size_t height, hipMemoryType dstType, + enum hipMemoryType srcType) { + ins.srcXInBytes=0; + ins.srcY=0; + ins.srcPitch=spitch; + ins.dstXInBytes=0; + ins.dstY=0; + ins.dstPitch=dpitch; + ins.WidthInBytes=width; + ins.Height=height; + ins.dstMemoryType= dstType; + ins.srcMemoryType= srcType; +} //--- // Test copies of a matrix numW by numH @@ -65,7 +80,11 @@ void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost) { unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH); HIPCHECK(hipMemcpy2D(A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy2D(B_d, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice)); + hip_Memcpy2D ins; + initMemCpyParam2D(ins,pitch_B,width,width,numH,hipMemoryTypeDevice,hipMemoryTypeHost); + ins.dstDevice = (hipDeviceptr_t)B_d; + ins.srcHost = B_h; + HIPCHECK(hipMemcpyParam2D(&ins)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, (pitch_C / sizeof(T)) * numH); @@ -115,7 +134,11 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH); HIPCHECK(hipMemcpyToArray(A_d, 0, 0, (void*)A_h, width, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpyToArray(B_d, 0, 0, (void*)B_h, width, hipMemcpyHostToDevice)); + hip_Memcpy2D ins; + initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost); + ins.dstArray = B_d; + ins.srcHost = B_h; + HIPCHECK(hipMemcpyParam2D(&ins)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW); @@ -152,15 +175,21 @@ void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch } else { HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, (void*)A_h, width, width, numH, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy2DToArray(B_d, 0, 0, (void*)B_h, width, width, numH, - hipMemcpyHostToDevice)); + hip_Memcpy2D ins; + initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost); + ins.dstArray = B_d; + ins.srcHost = B_h; + HIPCHECK(hipMemcpyParam2D(&ins)); } hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW * numH); - - HIPCHECK(hipMemcpy2D((void*)C_h, width, (void*)C_d->data, width, width, numH, - hipMemcpyDeviceToHost)); + printf("memcpy srcArray to dstHost\n"); + hip_Memcpy2D ins; + initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeHost,hipMemoryTypeArray); + ins.srcArray = C_d; + ins.dstHost = C_h; + HIPCHECK(hipMemcpyParam2D(&ins)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, numW * numH); @@ -227,8 +256,6 @@ int main(int argc, char* argv[]) { printf("\n\n=== tests&1 (types)\n"); printSep(); HIPCHECK(hipDeviceReset()); - size_t width = N / 6; - size_t height = N / 6; memcpy2Dtest(321, 211, 0); memcpy2Dtest(322, 211, 0); memcpy2Dtest(320, 211, 0); diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset.cpp index 7edfe16b3c..fac83ec003 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset.cpp @@ -26,11 +26,11 @@ THE SOFTWARE. * BUILD: %t %s ../../test_common.cpp * TEST: %t * //Small copy - * TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 + * TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 --memsetD16val 0x10 --memsetD8val 0x1 * // Oddball size - * TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF + * TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF --memsetD16val 0xDEAD --memsetD8val 0xDE * // Big copy - * TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE + * TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA * HIT_END */ @@ -86,6 +86,54 @@ bool testhipMemsetD32(int memsetD32val,int p_gpuDevice) return testResult; } +bool testhipMemsetD16(short memsetD16val,int p_gpuDevice) +{ + size_t Nbytes = N*sizeof(int); + printf ("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", N, memsetD16val, p_gpuDevice); + short *A_d; + short *A_h; + bool testResult = true; + + HIPCHECK ( hipMalloc(&A_d, Nbytes) ); + A_h = (short*)malloc(Nbytes); + HIPCHECK ( hipMemsetD16((hipDeviceptr_t)A_d, memsetD16val, N) ); + HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); + + for (int i=0; i= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD16val argument"); + } + memsetD16val = ex; + } else if (!strcmp(arg, "--memsetD8val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD8val argument"); + } + memsetD8val = ex; } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { failed("Bad iterations argument"); diff --git a/projects/clr/hipamd/tests/src/test_common.h b/projects/clr/hipamd/tests/src/test_common.h index 016cc34d52..73a952b0d1 100644 --- a/projects/clr/hipamd/tests/src/test_common.h +++ b/projects/clr/hipamd/tests/src/test_common.h @@ -109,6 +109,8 @@ THE SOFTWARE. extern size_t N; extern char memsetval; extern int memsetD32val; +extern short memsetD16val; +extern char memsetD8val; extern int iterations; extern unsigned blocksPerCU; extern unsigned threadsPerBlock;