Add hipMemcpy3DAsync (#1320)

* Add hipMemcpy3DAsync

* Fix CI build error

* Move back stream resolution to internal function

* Remove stream redefinition and check
Šī revīzija ir iekļauta:
Rahul Garg
2019-08-15 19:13:16 -07:00
revīziju iesūtīja Maneesh Gupta
vecāks c741ad3c80
revīzija fbc9f7e20a
3 mainīti faili ar 43 papildinājumiem un 6 dzēšanām
@@ -2022,6 +2022,19 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHo
*/
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p);
/**
* @brief Copies data between host and device asynchronously.
*
* @param[in] p 3D memory copy parameters
* @param[in] stream Stream to use
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
* #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
*
* @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
* hipMemcpyAsync
*/
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0));
// doxygen end Memory
/**
* @}
@@ -607,6 +607,11 @@ inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
return hipCUDAErrorTohipError(cudaMemcpy3D(p));
}
inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream)
{
return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream));
}
inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind,
hipStream_t stream) {
+25 -6
Parādīt failu
@@ -1387,10 +1387,9 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t
return ihipLogStatus(e);
}
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p);
hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) {
hipError_t e = hipSuccess;
if (p) {
if(p) {
size_t byteSize;
size_t depth;
size_t height;
@@ -1448,11 +1447,14 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
ySize = p->srcPtr.ysize;
dstPitch = p->dstPtr.pitch;
}
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
stream = ihipSyncAndResolveStream(stream);
hc::completion_future marker;
try {
if((widthInBytes == dstPitch) && (widthInBytes == srcPitch)) {
stream->locked_copySync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind, false);
if(isAsync)
stream->locked_copyAsync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind);
else
stream->locked_copySync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind, false);
} else {
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
@@ -1461,7 +1463,10 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
(unsigned char*)srcPtr + i * ySize * srcPitch + j * srcPitch;
unsigned char* dst =
(unsigned char*)dstPtr + i * height * dstPitch + j * dstPitch;
stream->locked_copySync(dst, src, widthInBytes, p->kind);
if(isAsync)
stream->locked_copyAsync(dst, src, widthInBytes, p->kind);
else
stream->locked_copySync(dst, src, widthInBytes, p->kind);
}
}
}
@@ -1471,6 +1476,20 @@ hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
} else {
e = hipErrorInvalidValue;
}
return e;
}
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p);
hipError_t e = hipSuccess;
e = ihipMemcpy3D(p, hipStreamNull, false);
return ihipLogStatus(e);
}
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemcpy3DAsync, (TRACE_MCMD), p, stream);
hipError_t e = hipSuccess;
e = ihipMemcpy3D(p, stream, true);
return ihipLogStatus(e);
}