diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 8db9e23317..97ff4bd1d7 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -2022,6 +2022,19 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo */ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] p 3D memory copy parameters + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); + // doxygen end Memory /** * @} diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index e1ff25d9f2..33dbcb82f3 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -607,6 +607,11 @@ inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) return hipCUDAErrorTohipError(cudaMemcpy3D(p)); } +inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream) +{ + return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream)); +} + inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index ccea09bb39..d7cd9f32d8 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -1387,10 +1387,9 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t return ihipLogStatus(e); } -hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { - HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p); +hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) { hipError_t e = hipSuccess; - if (p) { + if(p) { size_t byteSize; size_t depth; size_t height; @@ -1448,11 +1447,14 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { ySize = p->srcPtr.ysize; dstPitch = p->dstPtr.pitch; } - hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + stream = ihipSyncAndResolveStream(stream); hc::completion_future marker; try { if((widthInBytes == dstPitch) && (widthInBytes == srcPitch)) { - stream->locked_copySync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind, false); + if(isAsync) + stream->locked_copyAsync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind); + else + stream->locked_copySync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind, false); } else { for (int i = 0; i < depth; i++) { for (int j = 0; j < height; j++) { @@ -1461,7 +1463,10 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { (unsigned char*)srcPtr + i * ySize * srcPitch + j * srcPitch; unsigned char* dst = (unsigned char*)dstPtr + i * height * dstPitch + j * dstPitch; - stream->locked_copySync(dst, src, widthInBytes, p->kind); + if(isAsync) + stream->locked_copyAsync(dst, src, widthInBytes, p->kind); + else + stream->locked_copySync(dst, src, widthInBytes, p->kind); } } } @@ -1471,6 +1476,20 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { } else { e = hipErrorInvalidValue; } + return e; +} + +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { + HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p); + hipError_t e = hipSuccess; + e = ihipMemcpy3D(p, hipStreamNull, false); + return ihipLogStatus(e); +} + +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_SPECIAL_API(hipMemcpy3DAsync, (TRACE_MCMD), p, stream); + hipError_t e = hipSuccess; + e = ihipMemcpy3D(p, stream, true); return ihipLogStatus(e); }