From e4e9475327e1ee958a41f2ca274b13608e07b537 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 7 Jan 2020 15:59:02 -0500 Subject: [PATCH] P4 to Git Change 2053320 by vsytchen@vsytchen-hip-win10 on 2020/01/07 15:54:34 SWDEV-215533 - [HIP][Windows]Output mismatch with tex3D() 1. Implement hipMemcpy3DAsync(). 2. Add logic in hipMemcpy3D() to determine based on src/dst parameters if the user intended on calling cudaMemcpy3D() or cuMemcpy3D(). ReviewBoardURL = http://ocltc.amd.com/reviews/r/18444/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#38 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#36 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#95 edit [ROCm/hip commit: 3cbd083c8dcb3582cbb4d9b0082130b87df9694d] --- projects/hip/api/hip/hip_hcc.def.in | 1 + projects/hip/api/hip/hip_hcc.map.in | 1 + projects/hip/api/hip/hip_memory.cpp | 344 +++++++++++++++++++++++----- 3 files changed, 284 insertions(+), 62 deletions(-) diff --git a/projects/hip/api/hip/hip_hcc.def.in b/projects/hip/api/hip/hip_hcc.def.in index 6aba1f5a25..fd51a5701a 100644 --- a/projects/hip/api/hip/hip_hcc.def.in +++ b/projects/hip/api/hip/hip_hcc.def.in @@ -87,6 +87,7 @@ hipMemcpy2D hipMemcpy2DAsync hipMemcpy2DToArray hipMemcpy3D +hipMemcpy3DAsync hipMemcpyAsync hipMemcpyDtoD hipMemcpyDtoDAsync diff --git a/projects/hip/api/hip/hip_hcc.map.in b/projects/hip/api/hip/hip_hcc.map.in index f80d7db23a..c0e2bf9458 100644 --- a/projects/hip/api/hip/hip_hcc.map.in +++ b/projects/hip/api/hip/hip_hcc.map.in @@ -88,6 +88,7 @@ global: hipMemcpy2DAsync; hipMemcpy2DToArray; hipMemcpy3D; + hipMemcpy3DAsync; hipMemcpyAsync; hipMemcpyDtoD; hipMemcpyDtoDAsync; diff --git a/projects/hip/api/hip/hip_memory.cpp b/projects/hip/api/hip/hip_memory.cpp index 11e08384fc..6b23913386 100644 --- a/projects/hip/api/hip/hip_memory.cpp +++ b/projects/hip/api/hip/hip_memory.cpp @@ -1107,95 +1107,315 @@ hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t HIP_RETURN(hipSuccess); } -hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { - HIP_INIT_API(hipMemcpy3D, p); - - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); - - size_t byteSize; - size_t srcPitchInBytes; - size_t dstPitchInbytes; - void* srcPtr; - void* dstPtr; - size_t srcOrigin[3]; - size_t dstOrigin[3]; - size_t region[3]; - - region[2] = p->extent.depth; - region[1] = p->extent.height; - region[0] = p->extent.width; - srcOrigin[0] = p->srcPos.x; - srcOrigin[1] = p->srcPos.y; - srcOrigin[2] = p->srcPos.z; - dstOrigin[0] = p->dstPos.x; - dstOrigin[1] = p->dstPos.y; - dstOrigin[2] = p->dstPos.z; - - if (p->dstArray != nullptr) { - getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &byteSize); - dstPitchInbytes = p->dstArray->width * byteSize; - srcPitchInBytes = p->srcPtr.pitch; - srcPtr = (void*)p->srcPtr.ptr; - dstPtr = p->dstArray->data; +hipError_t ihipMemcpy3D_V1(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + const void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if (p->srcMemoryType == hipMemoryTypeHost) { + srcPtr = p->srcHost; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if ((p->srcMemoryType == hipMemoryTypeDevice) || + (p->srcMemoryType == hipMemoryTypeUnified)) { + srcPtr = p->srcDevice; + srcRowPitchInBytes = p->srcPitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcHeight; + } else if (p->srcMemoryType == hipMemoryTypeArray) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; } else { - srcPitchInBytes = p->srcPtr.pitch; - dstPitchInbytes = p->dstPtr.pitch; - srcPtr = p->srcPtr.ptr; - dstPtr = p->dstPtr.ptr; + ShouldNotReachHere(); } - // Create buffer rectangle info structure + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if (p->dstMemoryType == hipMemoryTypeHost) { + dstPtr = p->dstHost; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if ((p->dstMemoryType == hipMemoryTypeDevice) || + (p->dstMemoryType == hipMemoryTypeUnified)) { + dstPtr = p->dstDevice; + dstRowPitchInBytes = p->dstPitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstHeight; + } else if (p->dstMemoryType == hipMemoryTypeArray) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else { + ShouldNotReachHere(); + } + + // For HIP arrays, srcXInBytes must be evenly divisible by the array element size. + if ((p->srcMemoryType == hipMemoryTypeArray) && + ((p->srcXInBytes % srcElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcPitch < (p->WidthInBytes + p->srcXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->srcMemoryType != hipMemoryTypeArray) && + (p->srcHeight < (p->Height + p->srcY))) { + return hipErrorInvalidValue; + } + + // For HIP arrays, dstXInBytes must be evenly divisible by the array element size. + if ((p->dstMemoryType == hipMemoryTypeArray) && + ((p->dstXInBytes % dstElementSizeInBytes) != 0)) { + return hipErrorInvalidValue; + } + + // If specified, srcPitch must be greater than or equal to WidthInBytes + srcXInBytes + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstPitch < (p->WidthInBytes + p->dstXInBytes))) { + return hipErrorInvalidValue; + } + + // If specified, srcHeight must be greater than or equal to Height + srcY + if ((p->dstMemoryType != hipMemoryTypeArray) && + (p->dstHeight < (p->Height + p->dstY))) { + return hipErrorInvalidValue; + } + + // The srcLOD and dstLOD members of the CUDA_MEMCPY3D structure must be set to 0. + if ((p->srcLOD != 0) || (p->dstLOD != 0)) { + return hipErrorInvalidValue; + } + + size_t region[3]; + region[0] = p->WidthInBytes; + region[1] = p->Height; + region[2] = p->Depth; + + size_t srcOrigin[3]; + srcOrigin[0] = p->srcXInBytes; + srcOrigin[1] = p->srcY; + srcOrigin[2] = p->srcZ; + + size_t dstOrigin[3]; + dstOrigin[0] = p->dstXInBytes; + dstOrigin[1] = p->dstY; + dstOrigin[2] = p->dstZ; + amd::BufferRect srcRect; - amd::BufferRect dstRect; - size_t offset = 0; - amd::Memory* srcMemory = getMemoryObject(srcPtr, offset); - assert(offset == 0); - amd::Memory* dstMemory = getMemoryObject(dstPtr, offset); - assert(offset == 0); - - size_t src_slice_pitch = srcPitchInBytes * p->extent.height; - size_t dst_slice_pitch = dstPitchInbytes * p->extent.height; - - if (!srcRect.create(srcOrigin, region, srcPitchInBytes, src_slice_pitch) || - !dstRect.create(dstOrigin, region, dstPitchInbytes, dst_slice_pitch)) { - HIP_RETURN(hipErrorInvalidValue); + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; } - hipMemcpyKind kind = p->kind; + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); amd::Command* command = nullptr; amd::Command::EventWaitList waitList; - - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D size(region[0], region[1], region[2]); + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); if (((srcMemory == nullptr) && (dstMemory == nullptr)) || - (kind == hipMemcpyHostToHost)) { + (p->kind == hipMemcpyHostToHost)) { memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); - HIP_RETURN(hipSuccess); + return hipSuccess; } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, - *dstMemory->asBuffer(), srcStart, size, srcPtr, srcRect, dstRect); + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, - *srcMemory->asBuffer(), srcStart, size, dstPtr, srcRect, dstRect); + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, - *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, size, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, srcRect, dstRect); } if (command == nullptr) { - HIP_RETURN(hipErrorOutOfMemory); + return hipErrorOutOfMemory; } command->enqueue(); - command->awaitCompletion(); + if (!isAsync) { + command->awaitCompletion(); + } command->release(); - HIP_RETURN(hipSuccess); + return hipSuccess; +} + +hipError_t ihipMemcpy3D_V2(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + void* srcPtr = nullptr; + size_t srcElementSizeInBytes = sizeof(unsigned char); + size_t srcRowPitchInBytes = 0; + size_t srcSlicePitchInBytes = 0; + if ((p->srcArray != nullptr) && (p->srcPtr.ptr == nullptr)) { + srcPtr = p->srcArray->data; + getByteSizeFromChannelFormatKind(p->srcArray->desc.f, &srcElementSizeInBytes); + srcElementSizeInBytes *= p->srcArray->NumChannels; + srcRowPitchInBytes = srcElementSizeInBytes * p->srcArray->width; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcArray->height; + } else if ((p->srcArray == nullptr) && (p->srcPtr.ptr != nullptr)) { + srcPtr = p->srcPtr.ptr; + srcRowPitchInBytes = p->srcPtr.pitch; + srcSlicePitchInBytes = srcRowPitchInBytes * p->srcPtr.ysize; + } else { + ShouldNotReachHere(); + } + + void* dstPtr = nullptr; + size_t dstElementSizeInBytes = sizeof(unsigned char); + size_t dstRowPitchInBytes = 0; + size_t dstSlicePitchInBytes = 0; + if ((p->dstArray != nullptr) && (p->dstPtr.ptr == nullptr)) { + dstPtr = p->dstArray->data; + getByteSizeFromChannelFormatKind(p->dstArray->desc.f, &dstElementSizeInBytes); + dstElementSizeInBytes *= p->dstArray->NumChannels; + dstRowPitchInBytes = dstElementSizeInBytes * p->dstArray->width; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstArray->height; + } else if ((p->dstArray == nullptr) && (p->dstPtr.ptr != nullptr)) { + dstPtr = p->dstPtr.ptr; + dstRowPitchInBytes = p->srcPtr.pitch; + dstSlicePitchInBytes = dstRowPitchInBytes * p->dstPtr.ysize; + } else { + ShouldNotReachHere(); + } + + // If the source and destination are both arrays, they must have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (srcElementSizeInBytes != dstElementSizeInBytes)) { + return hipErrorInvalidValue; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + // If no HIP array is participating in the copy, the extent is defined in elements of unsigned char. + size_t region[3]; + if (p->srcArray != nullptr) { + region[0] = srcRowPitchInBytes; + } else if (p->dstArray != nullptr) { + region[0] = dstRowPitchInBytes; + } else { + region[0] = sizeof(unsigned char) * p->extent.width; + } + region[1] = p->extent.height; + region[2] = p->extent.depth; + + // The offset into the object is defined in units of the object's elements. + size_t srcOrigin[3]; + srcOrigin[0] = srcElementSizeInBytes * p->srcPos.x; + srcOrigin[1] = p->srcPos.y; + srcOrigin[2] = p->srcPos.z; + + amd::BufferRect srcRect; + if (!srcRect.create(srcOrigin, region, srcRowPitchInBytes, srcSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t srcMemoryOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcPtr, srcMemoryOffset); + amd::Coord3D srcStart(srcRect.start_ + srcMemoryOffset, 0, 0); + + size_t dstOrigin[3]; + dstOrigin[0] = dstElementSizeInBytes * p->dstPos.x; + dstOrigin[1] = p->dstPos.y; + dstOrigin[2] = p->dstPos.z; + + amd::BufferRect dstRect; + if (!dstRect.create(dstOrigin, region, dstRowPitchInBytes, dstSlicePitchInBytes)) { + return hipErrorInvalidValue; + } + + size_t dstMemoryOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstPtr, dstMemoryOffset); + amd::Coord3D dstStart(dstRect.start_ + dstMemoryOffset, 0, 0); + + amd::Command* command = nullptr; + amd::Command::EventWaitList waitList; + amd::HostQueue* queue = hip::getQueue(stream); + amd::Coord3D regionSize(region[0], region[1], region[2]); + + if (((srcMemory == nullptr) && (dstMemory == nullptr)) || + (p->kind == hipMemcpyHostToHost)) { + memcpy(dstPtr, srcPtr, region[0] * region[1] * region[2]); + return hipSuccess; + } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + command = new amd::WriteMemoryCommand(*queue, CL_COMMAND_WRITE_BUFFER_RECT, waitList, + *dstMemory->asBuffer(), srcStart, regionSize, srcPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + command = new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, waitList, + *srcMemory->asBuffer(), srcStart, regionSize, dstPtr, srcRect, dstRect); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + command = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_RECT, waitList, + *srcMemory->asBuffer(),*dstMemory->asBuffer(), srcStart, dstStart, regionSize, + srcRect, dstRect); + } + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + + return hipSuccess; +} + +hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + // Having src and dst be an array is ambigous, since we can't tell if the user intended to call hipMemcpy3D_V1() or hipMemcpy3D_V2(). + // For now hope that we never encounter this case. + assert((p->srcArray == nullptr) || (p->dstArray == nullptr)); + + // When calling hipMemcpy3D_V1(), the user must specify + // one of srcHost, srcDevice or srcArray and + // one of dstHost, dstDevice or dstArray. + if (((p->srcHost != nullptr) || (p->srcDevice != nullptr) || (p->srcArray != nullptr)) && + ((p->dstHost != nullptr) || (p->dstDevice != nullptr) || (p->dstArray != nullptr))) { + return ihipMemcpy3D_V1(p, stream, isAsync); + } + + // When calling hipMemcpy3D_V2(), the user must specify + // one of srcArray or srcPtr and + // one of dstArray or dstPtr. + if (((p->srcArray != nullptr) || (p->srcPtr.ptr != nullptr)) && + ((p->dstArray != nullptr) || (p->dstPtr.ptr != nullptr))) { + return ihipMemcpy3D_V2(p, stream, isAsync); + } + + // If we got here, then the user specified an invalid combination of src/dst parameters. + return hipErrorInvalidValue; +} + +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + + HIP_RETURN(ihipMemcpy3D(p, nullptr)); +} + +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + + HIP_RETURN(ihipMemcpy3D(p, stream, true)); } hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes,